In this document, we will compare the enrichment results from online GREAT and local GREAT. The four datasets are all from UCSC table browser. Parameters are:

clade = Mammal
genome = Human
assembly = GRCh37/hg19
group = Regulation
track = ENCODE 3 TFBS
table: A549 JUN, A549 ELF1, H1-hESC RXRA, GM12878 MYB

And in the “Retrieve and display data” section:

output format = BED - browser extensible data

Then click the button “get output”.

We first read the files into GRanges objects:

read_bed = function(f) {
    df = read.table(f)
    df = df[df[, 1] %in% paste0("chr", c(1:22, "X", "Y")), ]
    GRanges(seqnames = df[, 1], ranges = IRanges(df[, 2] + 1, df[, 3]))
}
grl = list()
grl$A549_JUN = read_bed("data/tb_encTfChipPkENCFF708LCH_A549_JUN_hg19.bed")
grl$A549_ELF1 = read_bed("data/tb_encTfChipPkENCFF533NIV_A549_ELF1_hg19.bed")
grl$H1_hESC_RXRA = read_bed("data/tb_encTfChipPkENCFF369JAI_H1_hESC_RXRA_hg19.bed")
grl$GM12878_MYB = read_bed("data/tb_encTfChipPkENCFF215YWS_GM12878_MYB_hg19.bed")
sapply(grl, length)
##     A549_JUN    A549_ELF1 H1_hESC_RXRA  GM12878_MYB 
##         1726        11577         2092         3748

A549_JUN (1726 input regions)

Apply both online and local GREAT analysis. Note online GREAT exclude gap regions, and in local GREAT, by default gap regions are removed as well.

gr = grl$A549_JUN
job = submitGreatJob(gr)
tbl = getEnrichmentTables(job)
tb1 = tbl[["GO Biological Process"]]

res = great(gr, "GO:BP", "hg19")
tb2 = getEnrichmentTable(res)

tb1 and tb2 contain the full table of all GO terms under test. First we take the common GO terms in the two result tables.

cn = intersect(tb1$ID, tb2$id)
length(cn)
## [1] 4578
rownames(tb1) = tb1$ID
rownames(tb2) = tb2$id
tb1 = tb1[cn, ]
tb2 = tb2[cn, ]

The significant GO terms from the two tables.

lt2 = list(online = tb1$ID[tb1$Binom_Adjp_BH < 0.001],
          local = tb2$id[tb2$p_adjust < 0.001])
plot(euler(lt2), quantities = TRUE, main = "A549_JUN")

Next we compare the observed region hits and fold enrichment in the two results.

par(mfrow = c(1, 2))
plot(tb1$Binom_Observed_Region_Hits, tb2$observed_region_hits, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Observed region hits")
plot(tb1$Binom_Fold_Enrichment, tb2$fold_enrichment, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Fold enrichment")

Next we compare the two significant GO term lists by clustering them into groups.

lt3 = list(online = data.frame(id = tb1$ID, p_adjust = tb1$Binom_Adjp_BH), 
           local = data.frame(id = tb2$id, p_adjust = tb2$p_adjust))
library(simplifyEnrichment)
se_opt$verbose = FALSE
simplifyGOFromMultipleLists(lt3, padj_cutoff = 0.001)

A549_ELF1 (11577 input regions)

Apply both online and local GREAT analysis:

gr = grl$A549_ELF1
job = submitGreatJob(gr)
tbl = getEnrichmentTables(job)
tb1 = tbl[["GO Biological Process"]]

res = great(gr, "GO:BP", "hg19")
tb2 = getEnrichmentTable(res)

tb1 and tb2 contain the full table of all GO terms under test. First we take the common GO terms in the two result tables.

cn = intersect(tb1$ID, tb2$id)
length(cn)
## [1] 7552
rownames(tb1) = tb1$ID
rownames(tb2) = tb2$id
tb1 = tb1[cn, ]
tb2 = tb2[cn, ]

The significant GO terms from the two tables.

lt2 = list(online = tb1$ID[tb1$Binom_Adjp_BH < 0.001],
          local = tb2$id[tb2$p_adjust < 0.001])
plot(euler(lt2), quantities = TRUE, main = "A549_ELF1")

Next we compare the observed region hits and fold enrichment in the two results.

par(mfrow = c(1, 2))
plot(tb1$Binom_Observed_Region_Hits, tb2$observed_region_hits, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Observed region hits")
plot(tb1$Binom_Fold_Enrichment, tb2$fold_enrichment, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Fold enrichment")

Next we compare the two significant GO term lists by clustering them into groups.

lt3 = list(online = data.frame(id = tb1$ID, p_adjust = tb1$Binom_Adjp_BH), 
           local = data.frame(id = tb2$id, p_adjust = tb2$p_adjust))
library(simplifyEnrichment)
se_opt$verbose = FALSE
simplifyGOFromMultipleLists(lt3, padj_cutoff = 0.001)

H1_hESC_RXRA (2092 input regions)

Apply both online and local GREAT analysis:

gr = grl$H1_hESC_RXRA
job = submitGreatJob(gr)
tbl = getEnrichmentTables(job)
tb1 = tbl[["GO Biological Process"]]

res = great(gr, "GO:BP", "hg19")
tb2 = getEnrichmentTable(res)

tb1 and tb2 contain the full table of all GO terms under test. First we take the common GO terms in the two result tables.

cn = intersect(tb1$ID, tb2$id)
length(cn)
## [1] 4694
rownames(tb1) = tb1$ID
rownames(tb2) = tb2$id
tb1 = tb1[cn, ]
tb2 = tb2[cn, ]

The significant GO terms from the two tables.

lt2 = list(online = tb1$ID[tb1$Binom_Adjp_BH < 0.001],
          local = tb2$id[tb2$p_adjust < 0.001])
plot(euler(lt2), quantities = TRUE, main = "H1_hESC_RXRA")

Next we compare the observed region hits and fold enrichment in the two results.

par(mfrow = c(1, 2))
plot(tb1$Binom_Observed_Region_Hits, tb2$observed_region_hits, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Observed region hits")
plot(tb1$Binom_Fold_Enrichment, tb2$fold_enrichment, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Fold enrichment")

Next we compare the two significant GO term lists by clustering them into groups.

lt3 = list(online = data.frame(id = tb1$ID, p_adjust = tb1$Binom_Adjp_BH), 
           local = data.frame(id = tb2$id, p_adjust = tb2$p_adjust))
library(simplifyEnrichment)
se_opt$verbose = FALSE
simplifyGOFromMultipleLists(lt3, padj_cutoff = 0.001)

GM12878_MYB (3748 input regions)

Apply both online and local GREAT analysis:

gr = grl$GM12878_MYB
job = submitGreatJob(gr)
tbl = getEnrichmentTables(job)
tb1 = tbl[["GO Biological Process"]]

res = great(gr, "GO:BP", "hg19")
tb2 = getEnrichmentTable(res)

tb1 and tb2 contain the full table of all GO terms under test. First we take the common GO terms in the two result tables.

cn = intersect(tb1$ID, tb2$id)
length(cn)
## [1] 5775
rownames(tb1) = tb1$ID
rownames(tb2) = tb2$id
tb1 = tb1[cn, ]
tb2 = tb2[cn, ]

The significant GO terms from the two tables.

lt2 = list(online = tb1$ID[tb1$Binom_Adjp_BH < 0.001],
          local = tb2$id[tb2$p_adjust < 0.001])
plot(euler(lt2), quantities = TRUE, main = "GM12878_MYB")

Next we compare the observed region hits and fold enrichment in the two results.

par(mfrow = c(1, 2))
plot(tb1$Binom_Observed_Region_Hits, tb2$observed_region_hits, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Observed region hits")
plot(tb1$Binom_Fold_Enrichment, tb2$fold_enrichment, pch = 16, col = "#00000010",
    xlab = "online GREAT", ylab = "local GREAT", main = "Fold enrichment")

Next we compare the two significant GO term lists by clustering them into groups.

lt3 = list(online = data.frame(id = tb1$ID, p_adjust = tb1$Binom_Adjp_BH), 
           local = data.frame(id = tb2$id, p_adjust = tb2$p_adjust))
library(simplifyEnrichment)
se_opt$verbose = FALSE
simplifyGOFromMultipleLists(lt3, padj_cutoff = 0.001)

Session info

## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] C/UTF-8/C/C/C/C
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
## [1] simplifyEnrichment_1.11.1 eulerr_7.0.0             
## [3] rGREAT_2.5.4              GenomicRanges_1.52.1     
## [5] GenomeInfoDb_1.36.4       IRanges_2.36.0           
## [7] S4Vectors_0.40.2          BiocGenerics_0.48.1      
## [9] knitr_1.44               
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3                      
##   [2] jsonlite_1.8.8                          
##   [3] shape_1.4.6                             
##   [4] magrittr_2.0.3                          
##   [5] magick_2.8.0                            
##   [6] GenomicFeatures_1.52.2                  
##   [7] rmarkdown_2.25                          
##   [8] GlobalOptions_0.1.2                     
##   [9] fs_1.6.3                                
##  [10] BiocIO_1.10.0                           
##  [11] zlibbioc_1.46.0                         
##  [12] ragg_1.2.6                              
##  [13] vctrs_0.6.4                             
##  [14] Cairo_1.6-2                             
##  [15] memoise_2.0.1                           
##  [16] Rsamtools_2.16.0                        
##  [17] RCurl_1.98-1.12                         
##  [18] htmltools_0.5.7                         
##  [19] S4Arrays_1.0.6                          
##  [20] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 
##  [21] progress_1.2.2                          
##  [22] curl_5.1.0                              
##  [23] sass_0.4.8                              
##  [24] bslib_0.6.1                             
##  [25] htmlwidgets_1.6.2                       
##  [26] desc_1.4.2                              
##  [27] cachem_1.0.8                            
##  [28] commonmark_1.9.0                        
##  [29] GenomicAlignments_1.36.0                
##  [30] mime_0.12                               
##  [31] lifecycle_1.0.4                         
##  [32] iterators_1.0.14                        
##  [33] pkgconfig_2.0.3                         
##  [34] Matrix_1.6-1.1                          
##  [35] R6_2.5.1                                
##  [36] fastmap_1.1.1                           
##  [37] GenomeInfoDbData_1.2.10                 
##  [38] MatrixGenerics_1.12.3                   
##  [39] shiny_1.8.0                             
##  [40] clue_0.3-65                             
##  [41] digest_0.6.33                           
##  [42] colorspace_2.1-0                        
##  [43] AnnotationDbi_1.62.2                    
##  [44] rprojroot_2.0.3                         
##  [45] textshaping_0.3.7                       
##  [46] RSQLite_2.3.1                           
##  [47] org.Hs.eg.db_3.17.0                     
##  [48] filelock_1.0.2                          
##  [49] fansi_1.0.5                             
##  [50] httr_1.4.7                              
##  [51] polyclip_1.10-6                         
##  [52] abind_1.4-5                             
##  [53] compiler_4.3.1                          
##  [54] bit64_4.0.5                             
##  [55] doParallel_1.0.17                       
##  [56] BiocParallel_1.34.2                     
##  [57] DBI_1.1.3                               
##  [58] biomaRt_2.56.1                          
##  [59] rappdirs_0.3.3                          
##  [60] proxyC_0.3.3                            
##  [61] DelayedArray_0.26.7                     
##  [62] rjson_0.2.21                            
##  [63] tools_4.3.1                             
##  [64] httpuv_1.6.13                           
##  [65] glue_1.6.2                              
##  [66] restfulr_0.0.15                         
##  [67] GOSemSim_2.26.1                         
##  [68] promises_1.2.1                          
##  [69] gridtext_0.1.5                          
##  [70] polylabelr_0.2.0                        
##  [71] cluster_2.1.4                           
##  [72] generics_0.1.3                          
##  [73] hms_1.1.3                               
##  [74] xml2_1.3.6                              
##  [75] utf8_1.2.3                              
##  [76] XVector_0.40.0                          
##  [77] markdown_1.10                           
##  [78] foreach_1.5.2                           
##  [79] pillar_1.9.0                            
##  [80] stringr_1.5.0                           
##  [81] later_1.3.2                             
##  [82] circlize_0.4.15                         
##  [83] dplyr_1.1.3                             
##  [84] BiocFileCache_2.8.0                     
##  [85] lattice_0.21-9                          
##  [86] rtracklayer_1.60.1                      
##  [87] bit_4.0.5                               
##  [88] tidyselect_1.2.0                        
##  [89] GO.db_3.17.0                            
##  [90] ComplexHeatmap_2.18.0                   
##  [91] tm_0.7-11                               
##  [92] Biostrings_2.68.1                       
##  [93] NLP_0.2-1                               
##  [94] SummarizedExperiment_1.30.2             
##  [95] xfun_0.40                               
##  [96] Biobase_2.60.0                          
##  [97] matrixStats_1.2.0                       
##  [98] DT_0.30                                 
##  [99] stringi_1.7.12                          
## [100] yaml_2.3.7                              
## [101] TxDb.Hsapiens.UCSC.hg38.knownGene_3.17.0
## [102] evaluate_0.22                           
## [103] codetools_0.2-19                        
## [104] tibble_3.2.1                            
## [105] cli_3.6.2                               
## [106] RcppParallel_5.1.7                      
## [107] xtable_1.8-4                            
## [108] systemfonts_1.0.5                       
## [109] jquerylib_0.1.4                         
## [110] Rcpp_1.0.11                             
## [111] dbplyr_2.3.4                            
## [112] png_0.1-8                               
## [113] XML_3.99-0.14                           
## [114] parallel_4.3.1                          
## [115] ellipsis_0.3.2                          
## [116] pkgdown_2.0.7                           
## [117] blob_1.2.4                              
## [118] prettyunits_1.2.0                       
## [119] bitops_1.0-7                            
## [120] slam_0.1-50                             
## [121] purrr_1.0.2                             
## [122] crayon_1.5.2                            
## [123] GetoptLong_1.0.5                        
## [124] rlang_1.1.2                             
## [125] KEGGREST_1.40.1