Note in clusterProfiler, p-values are calculated by gene permutations!

We use a dataset gene_diff_score which is a vector of a certain metric for differential expression of genes.

gene_diff_score = readRDS(system.file("extdata", "gene_diff_score.rds", package = "GSEAtraining"))
head(gene_diff_score)
##    CDKN1A       BAX      MDM2      DDB2     STAT6     NTSR2 
## 0.8425507 0.8232428 0.6306318 0.5945239 0.5791271 0.5712063

We need to convert genes from symbols to EntreZ IDs.

library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
##     as.data.frame, basename, cbind, colnames, dirname, do.call,
##     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
##     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     I, expand.grid, unname
## 
map = mapIds(org.Hs.eg.db, keys = names(gene_diff_score), keytype = "SYMBOL", column = "ENTREZID")
## 'select()' returned 1:many mapping between keys and columns
head(map)
##  CDKN1A     BAX    MDM2    DDB2   STAT6   NTSR2 
##  "1026"   "581"  "4193"  "1643"  "6778" "23620"
# names(gene_diff_score) = map[ names(gene_diff_score) ]

For this dataset, the mapping is 1:1. But you need to be careful when the mapping is not 1:1.

Here we use the convert_to_entrez_id() function from the GSEAtraining package, which automatically takes care of multiple mappings:

library(GSEAtraining)
gene_diff_score = convert_to_entrez_id(gene_diff_score)
##   gene id might be SYMBOL (p =  1.000 )
## 'select()' returned 1:many mapping between keys and columns
head(gene_diff_score)
##          10         100        1000       10000       10001       10002 
##  0.02683074  0.12681435 -0.13737449  0.13335944 -0.18805099  0.12855168

Load the clusterProfiler package.

Next we perform GSEA on different gene set sources.

Before we start, gene_diff_score must be pre-sorted.

gene_diff_score = sort(gene_diff_score, decreasing = TRUE)

GO enrichment

library(org.Hs.eg.db)
tb = gseGO(geneList = gene_diff_score, ont = "BP", OrgDb = org.Hs.eg.db)
head(tb)
##                    ID                                               Description
## GO:0007186 GO:0007186              G protein-coupled receptor signaling pathway
## GO:0098662 GO:0098662                  inorganic cation transmembrane transport
## GO:0098655 GO:0098655                 monoatomic cation transmembrane transport
## GO:0030001 GO:0030001                                       metal ion transport
## GO:0045087 GO:0045087                                    innate immune response
## GO:1902850 GO:1902850 microtubule cytoskeleton organization involved in mitosis
##            setSize enrichmentScore       NES       pvalue     p.adjust
## GO:0007186     472       0.3655642  1.870331 1.754647e-10 8.860969e-07
## GO:0098662     420       0.3312151  1.685890 8.390071e-07 1.412329e-03
## GO:0098655     426       0.3312117  1.684925 7.347250e-07 1.412329e-03
## GO:0030001     484       0.3237042  1.659821 1.231619e-06 1.554918e-03
## GO:0045087     469       0.3202448  1.638181 1.684740e-06 1.701588e-03
## GO:1902850      69      -0.4809040 -2.203288 2.295757e-06 1.932262e-03
##                  qvalue rank                   leading_edge
## GO:0007186 7.775858e-07 1687 tags=39%, list=25%, signal=31%
## GO:0098662 1.239375e-03 1934 tags=39%, list=29%, signal=30%
## GO:0098655 1.239375e-03 1934 tags=39%, list=29%, signal=30%
## GO:0030001 1.364504e-03 1917 tags=38%, list=29%, signal=29%
## GO:0045087 1.493212e-03 1450 tags=30%, list=22%, signal=26%
## GO:1902850 1.695638e-03 1730 tags=51%, list=26%, signal=38%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     core_enrichment
## GO:0007186 23620/26033/150/9248/23295/2151/6870/3060/3362/4544/2911/6357/6714/6753/3351/6608/815/1113/6366/3360/26532/7432/2842/8685/181/5148/1237/19/9934/680/51704/729230/5573/247/8477/8601/112/107/2775/6370/2919/2099/1812/3358/4157/2783/6344/5144/5741/887/146/1909/5138/1814/5368/4985/134/186/2642/10636/348/29909/88/8851/611/23683/5132/335/1608/4161/3357/3363/11248/6002/6362/9718/7434/2841/2185/6375/2833/3630/623/10692/22844/2219/2780/4935/5330/5294/6360/153/8644/8620/5332/2788/6354/6356/4829/954/135948/113/2792/2253/4987/56288/11069/3248/10266/1258/10672/7275/2843/6364/10268/115/6900/2695/6846/776/4158/1672/3640/26648/6014/2918/345/8484/1816/5140/3000/3558/5995/7409/8111/22986/11247/56848/152/5141/7849/3717/7301/6363/5697/6622/2853/1910/109/1269/9138/7932/2798/6295/9826/9590/1230/6754/1606/6001/6915/6368/916/5996/9368/6755/5443/4852/719/2914/8527/5367/2870/6011/6010/9283/4308/108/3356/6387/2691/10798
## GO:0098662                                                                                          581/8912/2906/8514/2040/150/7225/596/5371/3786/5027/6524/6553/6568/4544/6556/23479/343/527/845/6366/859/783/3756/6539/10786/6540/8913/2745/51606/81614/1812/3358/25800/6530/2783/486/5144/55117/9104/6334/1909/1814/5336/3758/4891/90134/26133/5319/88/5581/6262/3357/9254/9512/3208/967/6528/2185/6375/3738/3780/478/3777/623/3755/6335/5330/5294/9446/5664/6564/5170/825/6263/9472/5332/23327/3749/6569/6340/9478/1020/777/931/7222/9498/4987/5587/10021/3764/1535/526/10268/10063/776/5024/528/3458/5250/5026/10058/5534/10246/6534/610/11194/4734/5774/56848/491/7386/6363/6622/482/4842/1910/8671/309/64116/6531/6546/9368/1340/3772/9296/1327/2903/4308/535/3709/3356/2905/3741/1497/6236/9001/6548/4283/8989/5348/10050/6574/774/57468/6557/7779/5142/6834/9114/1236/495/3782/9187/3768/2056/2815/5354/3359/3785/5788/6833/6520/117/9550
## GO:0098655                                                                               581/8912/2906/8514/2040/150/7225/596/5371/3786/5027/6524/6553/6568/4544/6556/23479/527/845/6366/859/783/3756/6539/10786/6540/8913/2745/51606/81614/1812/3358/25800/6530/1145/2783/486/5144/55117/9104/6334/1909/1814/5336/3758/1520/4891/90134/26133/5319/88/5581/6262/3357/9254/9512/3208/967/6528/2185/6375/3738/3780/478/3777/623/3755/6335/5330/5294/9446/5664/6564/5170/825/6263/9472/5332/23327/3749/6569/6340/9478/1020/777/931/7222/9498/4987/1258/5587/10021/3764/1535/526/10268/10063/776/5024/528/3458/5250/5026/10058/5534/10246/6534/610/11194/4734/5774/56848/491/4318/7386/6363/6622/482/4842/1910/8671/309/64116/6531/6546/9368/1340/3772/9296/1327/2903/4308/535/3709/3356/2905/3741/1497/6236/9001/6548/4283/8989/5348/10050/6574/774/57468/6557/7779/5142/6834/9114/1236/495/3782/9187/3768/2056/2815/5354/3785/5788/6833/6520/117/9550
## GO:0030001   581/8912/2906/8514/2040/150/7225/596/5371/3786/5027/3060/6524/6553/6568/4544/3718/6556/1017/23479/815/9058/920/845/6366/7432/816/859/783/3756/1272/6539/10786/6540/8913/3263/9962/5187/2745/81614/1812/3358/25800/6530/2783/486/5144/55117/9104/6334/7369/1909/1814/5336/3758/4985/134/145389/4891/90134/26133/5319/88/5581/6262/3357/9254/9512/3208/967/6528/2185/6375/3738/3780/2702/478/3777/623/3755/7178/6335/5330/5294/3077/9446/5664/5170/825/6263/9472/5332/23327/3749/6569/6340/475/2512/5743/9478/1020/777/931/7222/9498/4987/10266/5587/10021/3764/1535/10268/10063/776/5024/3458/3273/5026/10058/8884/5534/10246/6534/610/11194/4734/5774/56848/491/3603/6363/6622/482/4842/1910/8671/309/64116/6531/6546/1230/9368/4504/3772/1213/2903/2288/4308/3709/3356/6387/1447/2905/3741/6236/9001/6548/6527/4283/8989/5348/10050/6574/468/774/57468/6557/7779/5142/1236/495/3782/9187/3768/2056/2815/5354/3785/5788/6833/6520/1141
## GO:0045087                                                                                                                                                                                      3140/5802/3439/22841/5371/10475/1668/1436/3684/10346/259197/10584/3565/6223/23643/653361/3718/6357/6556/6714/1113/200316/7124/923/6366/4063/8685/6039/10666/9641/2813/3663/8942/3428/337/3441/4814/3263/27350/729/6370/5648/4321/2099/383/2213/713/71/9261/3662/7369/5336/6441/1520/7376/5063/3448/5319/348/10623/5581/6733/7726/4282/640/3456/6362/9051/64135/10333/2207/3002/6375/3630/10155/2219/914/5294/841/2671/735/6360/8764/5170/7294/639/10865/3055/7096/5988/6354/3805/1660/6356/4829/3848/10225/26191/5608/7128/3592/6318/9450/9958/5587/4360/3551/1535/6364/6036/6846/1672/9437/7535/3458/4688/8876/4435/9447/3113/7409/5770/3556/3146/3329/2268/10747/8767/7454/1380/3430/3717/7301/6363/6622/731/23138/11326/733/4332/3442/60489/6672
## GO:1902850                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      991/5347/2316/7415/3303/4926/3925/23636/4134/1058/22974/2801/7283/9493/3833/8480/387/22919/4204/332/55968/4085/9525/10252/23291/8766/3837/891/10253/7272/9183/10735/4751/9371/29899

KEGG enrichment

tb = gseKEGG(geneList = gene_diff_score, organism = "hsa")
head(tb)
##                ID                             Description setSize
## hsa04080 hsa04080 Neuroactive ligand-receptor interaction     233
## hsa03010 hsa03010                                Ribosome      85
## hsa04060 hsa04060  Cytokine-cytokine receptor interaction     192
## hsa04940 hsa04940                Type I diabetes mellitus      36
## hsa04514 hsa04514                 Cell adhesion molecules      98
## hsa05332 hsa05332               Graft-versus-host disease      29
##          enrichmentScore      NES       pvalue     p.adjust       qvalue rank
## hsa04080       0.4123718 1.986385 7.213194e-09 2.344288e-06 1.913395e-06 2404
## hsa03010       0.5118898 2.168882 2.504863e-07 4.070403e-05 3.322240e-05 2348
## hsa04060       0.3894784 1.836697 1.930874e-06 2.091780e-04 1.707299e-04 1812
## hsa04940       0.5967598 2.135543 1.863607e-05 1.514181e-03 1.235866e-03 1530
## hsa04514       0.4276091 1.847140 5.565222e-05 3.617394e-03 2.952497e-03 1804
## hsa05332       0.6066844 2.060975 1.315649e-04 7.126434e-03 5.816555e-03 1762
##                            leading_edge
## hsa04080 tags=52%, list=36%, signal=35%
## hsa03010 tags=60%, list=35%, signal=40%
## hsa04060 tags=45%, list=27%, signal=34%
## hsa04940 tags=56%, list=23%, signal=43%
## hsa04514 tags=45%, list=27%, signal=33%
## hsa05332 tags=62%, list=26%, signal=46%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              core_enrichment
## hsa04080 23620/2906/150/2901/9248/2151/6870/7425/5027/3060/3362/4544/2911/6753/3351/3360/7432/1907/9934/680/1812/3358/1145/4157/6344/5741/887/8973/146/1909/1814/5368/4985/134/186/2562/2642/551/4161/3357/3363/7434/623/4295/153/8620/2689/4829/885/2558/3001/4987/56288/2554/2695/4158/3640/5024/2918/8484/1816/6013/2688/1511/5026/2560/152/1142/5697/7200/2569/1910/1269/3953/2798/6754/6915/7068/6755/5443/4852/719/2914/5367/2903/3356/2905/2691/5173/4887/3827/1141/117/2742/2899/2561/5739/6019/1813/1392/1908/727/2690/3972/2912/2897/2692/3375/796/554/4889/1144/6752/9002/5724/3952/4988/8001/5729/1136/2890/5025
## hsa03010                                                                                                                                                                                                                                                                                                                                                    6223/6171/6192/6232/6175/6137/6210/6143/6176/6169/6182/6128/9349/6122/6139/6168/6187/6224/6147/6160/6124/6209/9801/6218/6155/6150/6152/6194/6158/6228/6235/6233/6129/6234/6173/23521/6201/6156/6193/6132/65005/6222/6191/6157/6204/6142/6144/9045/6203/6183/4736
## hsa04060                                                                                                                                                                       355/3439/9518/9466/1436/3552/3623/3565/8744/6357/7124/1896/920/6366/353500/1237/729230/3441/6372/6370/7293/2919/3598/3568/10563/3448/2920/1439/1440/10850/3456/6362/4050/6375/3560/2833/1271/6360/1441/8764/5008/943/2689/6354/3570/6356/3566/3553/4055/3592/7066/6364/6846/3458/1270/3595/93/3558/2688/91/3556/7046/3554/8600/3603/7850/6363/3574/11009/3600/3953/3442/8718/1230/6368/8793/9173/3590/6387/3624/3455/4283/3562/8795/2662/8794
## hsa04940                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  355/3552/7124/3109/3111/3002/3630/5551/3553/5798/3592/3115/3119/5799/3458/3558/3113/3329/3108/3122
## hsa04514                                                                                                                                                                                                                                                                                                                                                                                         51208/5802/3684/4897/23562/923/920/1272/10666/3109/8174/4099/947/5175/29851/3111/3683/6404/3680/914/5789/5133/9378/22865/1003/3115/3897/3119/6900/7122/3385/933/3113/3695/6401/3108/6383/3122/6402/7412/9019/3125/4359/1013
## hsa05332                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          355/3552/7124/3109/3111/3002/5551/3553/3115/115653/3119/3458/3558/3113/3108/3122/3812/3125

Reactome enrichment

## ReactomePA v1.44.0  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
tb = gsePathway(geneList = gene_diff_score, organism = "human")
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
head(tb)
##                          ID                          Description setSize
## R-HSA-373076   R-HSA-373076 Class A/1 (Rhodopsin-like receptors)     198
## R-HSA-69278     R-HSA-69278                  Cell Cycle, Mitotic     275
## R-HSA-1640170 R-HSA-1640170                           Cell Cycle     347
## R-HSA-500792   R-HSA-500792                  GPCR ligand binding     268
## R-HSA-68886     R-HSA-68886                              M Phase     171
## R-HSA-388396   R-HSA-388396           GPCR downstream signalling     403
##               enrichmentScore       NES       pvalue     p.adjust       qvalue
## R-HSA-373076        0.4403472  2.070035 2.532979e-09 1.230082e-06 1.009824e-06
## R-HSA-69278        -0.3427274 -1.946058 2.810546e-09 1.230082e-06 1.009824e-06
## R-HSA-1640170      -0.3244639 -1.885011 1.103254e-09 1.230082e-06 1.009824e-06
## R-HSA-500792        0.4002429  1.944084 7.824351e-09 2.568343e-06 2.108457e-06
## R-HSA-68886        -0.3745608 -1.990677 2.236504e-07 4.195042e-05 3.443879e-05
## R-HSA-388396        0.3456868  1.737108 2.163017e-07 4.195042e-05 3.443879e-05
##               rank                   leading_edge
## R-HSA-373076  1666 tags=43%, list=25%, signal=33%
## R-HSA-69278   1565 tags=38%, list=23%, signal=30%
## R-HSA-1640170 1750 tags=39%, list=26%, signal=30%
## R-HSA-500792  1681 tags=40%, list=25%, signal=31%
## R-HSA-68886   1565 tags=40%, list=23%, signal=31%
## R-HSA-388396  1681 tags=37%, list=25%, signal=30%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   core_enrichment
## R-HSA-373076                                                                                                                                                                                                                                                                                                                                    23620/150/2151/6870/3060/3362/4544/6357/6753/3351/6366/3360/1907/1237/9934/680/729230/8477/6372/6370/2919/1812/3358/4157/887/146/1909/1814/10563/5368/4985/134/186/2920/611/551/4161/3357/10850/3363/9718/2841/6375/2833/623/10692/4935/6360/4295/153/8620/6354/6356/4829/885/4987/6364/6846/4158/3640/8484/1816/5995/8111/152/6363/5697/7200/2853/1910/1269/2798/1230/6754/6915/6368/6755/5443/4852/719/5367/6010/9283/3356/6387
## R-HSA-69278                                                                                                                                                                                                                4926/8317/11130/7443/10213/7514/140609/23636/9184/1875/1032/701/23511/7027/1063/1457/1058/22974/23198/5594/4000/9491/2801/4218/890/8815/7283/699/9493/5934/10733/5690/994/5700/5982/91754/5685/1460/7153/2237/5981/5711/8480/10844/22919/5519/3161/332/5529/5695/4085/1859/5706/9232/990/25836/5905/203068/5933/23291/5683/54107/2010/23279/7341/6117/5721/5928/1031/3837/10121/7324/891/7531/10133/902/996/1719/5528/1030/5686/5702/8556/5500/5720/7465/23649/995/23133/3065/5753/9183/6118/11065/10735/5689/9663/4751/1029/2033/11004/5527/5526/9631
## R-HSA-1640170                                                    1022/4171/5693/991/5347/6847/6119/472/5687/5520/10714/5433/6502/4926/8317/11130/7443/10213/7514/140609/7014/23636/9184/1875/1032/701/23511/7027/4438/1063/1457/7529/1058/22974/23198/5594/4000/9491/2801/4218/890/1736/8815/1616/5932/7283/699/9493/5436/5934/10733/5432/5690/994/5700/5431/5982/91754/5685/1460/7153/2237/5981/5711/8480/10844/3364/22919/5519/3161/2810/332/5529/5695/4085/1859/5706/9232/990/25836/5905/203068/5933/11339/23291/5683/54107/2010/23279/7341/6117/5721/5928/546/1031/3837/10121/7324/891/7531/10133/902/996/1719/5528/7156/1030/5686/5702/8556/5500/5720/7465/23649/995/23133/3065/5753/9183/6118/11065/10735/5537/1111/5689/5437/9663/4751/1029/2033/11004/5527/5526/5810/9631
## R-HSA-500792                                                                                                                                                                                                                        23620/150/2151/6870/3060/3362/4544/2911/6357/6753/3351/6608/6366/3360/7432/1907/1237/9934/680/729230/8477/6372/6370/2919/1812/3358/4157/2783/6344/5741/887/146/1909/1814/10563/5368/50846/4985/134/186/2642/2920/611/551/4161/3357/6469/10850/3363/7482/9718/7434/2841/6375/2833/7476/623/10692/4935/6360/4295/153/8620/2788/6354/6356/4829/885/7475/2792/4987/10266/6364/10268/2695/6846/4158/3640/2918/8484/1816/5995/8111/152/6363/5697/7200/2853/1910/1269/2798/1230/6754/6915/6368/6755/5443/4852/719/2914/5367/6010/9283/3356/6387/2691
## R-HSA-68886                                                                                                                                                                                                                                                                                                                                                                                                    4926/11130/7443/10213/7514/140609/23636/9184/701/23511/1063/1457/1058/23198/5594/4000/9491/2801/8815/7283/699/9493/10733/5690/5700/91754/5685/1460/5711/8480/10844/22919/5519/332/5529/5695/4085/5706/9232/25836/5905/203068/5683/2010/23279/7341/5721/3837/10121/7324/891/7531/996/5528/5686/5702/5720/23133/9183/11065/10735/5689/9663/4751/11004/5527/5526/9631
## R-HSA-388396  23620/5136/150/7225/2151/6870/3060/3362/4544/2911/6357/6714/6753/3351/815/6366/3360/7432/1907/816/1237/9934/680/729230/5573/8477/8601/112/107/6372/6370/2919/1812/3358/4157/2783/6344/5144/5741/887/146/1909/5138/1814/10563/5368/5321/4985/134/186/5568/9639/2642/2920/10636/611/5581/1608/551/4161/3357/10850/3363/6002/5153/7434/2841/6375/2833/623/10692/2780/4935/5330/5294/63940/6360/4295/153/5170/8620/5332/2788/4829/885/113/1020/7222/2792/4987/10266/10672/2843/7410/6364/10268/115/2695/6846/4158/3640/2918/5580/8484/1816/5140/5995/7409/8111/5534/814/152/5141/6363/5697/7200/2853/1910/109/388/1269/9138/4168/2798/9826/1230/6754/1606/10276/6001/6915/6368/5996/6755/5443/4852/719/2914/8527/5367/2870/6010/9283/108/8503/3709/3356/6387/10235/2691

DO enrichment

## DOSE v3.26.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
tb = gseDO(geneList = gene_diff_score)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
head(tb)
##                    ID          Description setSize enrichmentScore      NES
## DOID:2841   DOID:2841               asthma     252       0.3901582 1.883531
## DOID:1176   DOID:1176    bronchial disease     260       0.3782700 1.833781
## DOID:10952 DOID:10952            nephritis     242       0.3680361 1.770792
## DOID:7148   DOID:7148 rheumatoid arthritis     434       0.3221059 1.636490
## DOID:2921   DOID:2921   glomerulonephritis     192       0.3752704 1.755029
## DOID:417     DOID:417   autoimmune disease     420       0.3126948 1.584362
##                  pvalue     p.adjust       qvalue rank
## DOID:2841  5.973397e-08 0.0000657671 0.0000533833 1653
## DOID:1176  6.419932e-07 0.0003534172 0.0002868696 1627
## DOID:10952 3.699123e-06 0.0010181835 0.0008264618 1613
## DOID:7148  3.228887e-06 0.0010181835 0.0008264618 1704
## DOID:2921  2.071252e-05 0.0040932101 0.0033224680 1461
## DOID:417   2.230632e-05 0.0040932101 0.0033224680 1762
##                              leading_edge
## DOID:2841  tags=37%, list=25%, signal=29%
## DOID:1176  tags=36%, list=24%, signal=29%
## DOID:10952 tags=36%, list=24%, signal=28%
## DOID:7148  tags=34%, list=25%, signal=27%
## DOID:2921  tags=35%, list=22%, signal=28%
## DOID:417   tags=34%, list=26%, signal=27%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  core_enrichment
## DOID:2841                                                                                                                                                                                                                                                                                              581/6778/355/6775/2100/3439/596/3552/5027/3623/3565/1215/6556/7124/7432/4524/259/1237/729230/2919/383/3598/3568/4056/5321/6441/134/186/6440/1439/1440/3315/4282/10850/6362/2181/3111/10333/6375/3560/2833/6404/2212/623/26762/5294/864/8764/28/5551/7096/6354/6580/6356/5743/3566/3553/3592/6318/3115/3119/1535/6364/4589/3067/6036/1672/3458/3240/3558/3113/6317/3329/4318/3603/6363/3574/4842/1910/3162/3600/7056/3631/3702/6915/6772/3122/6439/719/5367/9173/5265/2155
## DOID:1176                                                                                                                                                                                                                                                                                         581/6778/355/6775/2100/3439/596/3552/5027/3623/3565/1215/6556/7124/7432/4524/259/1237/729230/2919/383/3598/3568/4056/5321/6441/134/186/6440/1439/1440/3315/4282/10850/6362/2181/3111/10333/6375/4317/3560/2833/6404/2212/623/26762/5294/864/8764/28/5551/7096/6354/6580/6340/6356/5743/3566/3553/3592/6318/3115/3119/1535/6364/4589/3067/6036/1672/3458/3240/3558/3113/6317/3329/4318/3603/6363/3574/4842/1910/3162/3600/7056/3631/3702/6915/6772/3122/6439/719/5367/9173/5265
## DOID:10952                                                                                                                                                                                                                                                                                                                                1026/355/4653/6775/3439/7225/1436/3552/5027/3684/3565/1017/1113/7124/920/3164/4524/3818/19/8654/3663/729/7293/4321/2213/3568/10563/4056/2153/7490/186/348/5175/5076/3315/4282/1286/6469/6362/3683/1843/2185/6375/2833/2702/2212/4311/623/1285/119/26762/841/5551/7096/6354/6696/6356/5133/5743/231/3553/931/3119/4043/6364/2215/4868/345/2204/3458/285/894/6635/7454/4318/22925/7436/4842/6401/55806/7450/6402/3815/1773/719/4548/4489
## DOID:7148  1026/4193/355/6775/2100/596/5371/9518/1668/1436/4772/3552/5027/3623/3684/4118/3565/8744/653361/4544/6357/6556/8190/1113/7124/920/6366/4524/9641/3109/3663/729230/1836/337/5609/9420/6370/383/2213/3481/4481/10563/5321/6441/1520/8828/348/335/29851/3315/4282/3456/6362/9051/5783/64135/967/8651/4050/6528/7434/3002/6573/6375/4317/3560/2833/2212/1513/7178/914/5294/3077/8764/639/4689/5008/943/8454/6354/3570/6696/6356/1544/4286/5133/9047/5743/3566/3553/4055/26191/7128/931/10266/3119/6364/2215/843/2204/7535/3385/3458/5580/4688/3240/3558/11278/3146/4350/3554/3329/56848/2034/4318/8600/3603/3717/3574/3162/11009/1269/3600/6401/3953/896/1280/7056/5657/3108/8718/1230/6368/4170/8793/1773/5979/2821/4855/2870/5265/4249/3709/671/3356/6387/3820/7412/3624
## DOID:2921                                                                                                                                                                                                                                                                                                                                                                                                                                  355/4653/6775/3439/7225/1436/3552/5027/3684/3565/1017/1113/7124/3164/4524/3818/19/8654/729/4321/2213/3568/4056/2153/7490/186/348/5175/5076/3315/4282/1286/6469/6362/2185/6375/2702/2212/4311/623/1285/119/26762/5551/7096/6354/6696/5133/231/3553/931/3119/4043/6364/4868/345/2204/3458/285/6635/7454/4318/22925/7436/6401/55806/7450
## DOID:417                                        1026/581/355/6775/2100/3439/596/1436/3552/5027/3060/3684/3565/653361/6556/7124/923/920/6366/4524/4063/27040/3818/213/9021/5172/10666/1118/3663/43/7306/3428/337/2315/8174/1588/7293/7299/4321/2099/383/1145/2213/10563/5368/6441/4985/348/335/29851/10544/5069/2705/3315/4282/4593/640/3456/64135/1843/6528/2185/3002/6375/3560/2833/3738/230/3630/6285/3777/2212/368/5521/841/4295/639/6097/5551/3570/6356/2936/5133/5743/3566/8034/3553/26191/5445/3592/4987/3115/23385/3119/7295/2215/6846/4158/6649/345/2204/3458/10750/3240/3558/2688/3146/8678/3329/814/491/4318/1380/3603/3574/1638/3162/3600/6401/7056/5657/326/4345/6772/3122/5443/6402/1773/4548/5367/4855/5265/6387/3726/2691/4780/7412/868/4283/7035/10125/3562/3125

MSigDB enrichment

There is no built-in function specific for MSigDB gene sets, but there is a universal function GSEA() which accepts manually-specified gene sets. The gene sets object is simply a two-column data frame:

  • the first column is the gene set ID
  • the second column is the gene ID
library(msigdbr)
gene_sets = msigdbr(category = "H")
map = gene_sets[, c("gs_name", "entrez_gene")]
map$entrez_gene = as.character(map$entrez_gene)

tb = GSEA(geneList = gene_diff_score, TERM2GENE = map)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
head(tb)
##                                                        ID
## HALLMARK_G2M_CHECKPOINT           HALLMARK_G2M_CHECKPOINT
## HALLMARK_ALLOGRAFT_REJECTION HALLMARK_ALLOGRAFT_REJECTION
## HALLMARK_E2F_TARGETS                 HALLMARK_E2F_TARGETS
## HALLMARK_UV_RESPONSE_DN           HALLMARK_UV_RESPONSE_DN
## HALLMARK_MITOTIC_SPINDLE         HALLMARK_MITOTIC_SPINDLE
## HALLMARK_PROTEIN_SECRETION     HALLMARK_PROTEIN_SECRETION
##                                               Description setSize
## HALLMARK_G2M_CHECKPOINT           HALLMARK_G2M_CHECKPOINT     142
## HALLMARK_ALLOGRAFT_REJECTION HALLMARK_ALLOGRAFT_REJECTION     168
## HALLMARK_E2F_TARGETS                 HALLMARK_E2F_TARGETS     129
## HALLMARK_UV_RESPONSE_DN           HALLMARK_UV_RESPONSE_DN     122
## HALLMARK_MITOTIC_SPINDLE         HALLMARK_MITOTIC_SPINDLE     116
## HALLMARK_PROTEIN_SECRETION     HALLMARK_PROTEIN_SECRETION      80
##                              enrichmentScore       NES       pvalue
## HALLMARK_G2M_CHECKPOINT           -0.4110969 -2.133082 2.316384e-08
## HALLMARK_ALLOGRAFT_REJECTION       0.3851576  1.778851 2.320703e-05
## HALLMARK_E2F_TARGETS              -0.3557647 -1.806824 4.895813e-05
## HALLMARK_UV_RESPONSE_DN           -0.3540220 -1.796953 8.094755e-05
## HALLMARK_MITOTIC_SPINDLE          -0.3665070 -1.829145 1.027685e-04
## HALLMARK_PROTEIN_SECRETION        -0.3611803 -1.699317 1.637244e-03
##                                  p.adjust       qvalue rank
## HALLMARK_G2M_CHECKPOINT      1.158192e-06 9.265537e-07 1751
## HALLMARK_ALLOGRAFT_REJECTION 5.801758e-04 4.641407e-04 1530
## HALLMARK_E2F_TARGETS         8.159688e-04 6.527750e-04 1740
## HALLMARK_UV_RESPONSE_DN      1.011844e-03 8.094755e-04 1557
## HALLMARK_MITOTIC_SPINDLE     1.027685e-03 8.221477e-04 1717
## HALLMARK_PROTEIN_SECRETION   1.364370e-02 1.091496e-02 1369
##                                                leading_edge
## HALLMARK_G2M_CHECKPOINT      tags=46%, list=26%, signal=35%
## HALLMARK_ALLOGRAFT_REJECTION tags=38%, list=23%, signal=30%
## HALLMARK_E2F_TARGETS         tags=40%, list=26%, signal=30%
## HALLMARK_UV_RESPONSE_DN      tags=38%, list=23%, signal=30%
## HALLMARK_MITOTIC_SPINDLE     tags=42%, list=26%, signal=32%
## HALLMARK_PROTEIN_SECRETION   tags=32%, list=20%, signal=26%
##                                                                                                                                                                                                                                                                                                                                                  core_enrichment
## HALLMARK_G2M_CHECKPOINT      2176/4171/4678/991/5347/6558/7040/4926/8317/2130/4288/7514/3925/4691/9184/7027/1063/6541/1058/22974/4853/11104/1164/8451/1033/3151/890/1736/3609/699/9493/5887/10951/1432/10733/994/7153/3364/3161/332/4085/4212/9232/990/5933/546/1031/3837/6599/8065/996/4082/2935/3799/23649/7272/8819/6118/1756/11065/1111/4751/11004/1810/7290
## HALLMARK_ALLOGRAFT_REJECTION          355/6775/9466/3565/6223/6357/7124/920/3109/729230/43/8477/2213/3662/10563/1520/3111/3683/8651/4050/10333/3002/3560/2833/914/4689/5551/7096/6354/6356/3848/10225/3566/3553/3001/3059/3592/9450/3551/822/9437/7535/3458/7163/3558/894/4830/2268/8767/7454/4318/3603/3717/6363/3574/3600/896/3108/3702/1230/924/916/6772/3122
## HALLMARK_E2F_TARGETS                                                                   4171/4678/991/5347/6119/10714/7884/4288/7514/3925/701/10527/7037/1164/1033/10635/7283/3609/5395/5631/23468/10733/1786/994/5982/7153/5981/7913/6749/3364/3161/332/4085/9232/9833/203068/5902/6117/10606/7398/1031/2935/7465/23649/9319/9183/6118/1111/1029/5511/11004/5810
## HALLMARK_UV_RESPONSE_DN                                                                                                    657/2869/8476/4325/140609/50937/7049/9112/3397/6541/4853/5376/5999/23576/4092/1432/4131/8405/120/273/857/1739/4781/1859/25836/11099/302/4052/546/3488/2064/5737/1301/29970/51460/5468/323/594/2202/2650/8871/23469/9975/136/2152/5195
## HALLMARK_MITOTIC_SPINDLE                                                                                  6709/998/5347/2316/9771/2317/4926/23095/8476/10006/6654/1063/382/23637/23647/22974/4853/11104/4627/394/9344/7414/10928/613/699/9493/7153/5981/1739/10844/81/6093/22919/332/2017/10435/11135/7531/8936/996/667/4082/3799/7272/10013/4751/11004/832/9371
## HALLMARK_PROTEIN_SECRETION                                                                                                                                                                                                    9367/9871/10059/5594/6197/23673/5868/1956/538/1182/8774/8027/9525/664/372/11079/26286/27236/8773/9805/7251/667/9183/3920/9341/3998

Example for other organism

  1. GO enrichment

gseGO() accepts an OrgDb object as the source of the GO gene sets. We take pig as an example.

library(org.Ss.eg.db)
## 
all_genes = keys(org.Ss.eg.db, keytype = "ENTREZID")
scores = sort(rnorm(length(all_genes)), decreasing = TRUE)
names(scores) = sample(all_genes)
tb = gseGO(geneList = scores, ont = "BP", OrgDb = org.Ss.eg.db, pvalueCutoff = 1)
head(tb)
##                    ID
## GO:0048701 GO:0048701
## GO:0120163 GO:0120163
## GO:0002526 GO:0002526
## GO:0043618 GO:0043618
## GO:0048806 GO:0048806
## GO:0035567 GO:0035567
##                                                                                  Description
## GO:0048701                                          embryonic cranial skeleton morphogenesis
## GO:0120163                                 negative regulation of cold-induced thermogenesis
## GO:0002526                                                       acute inflammatory response
## GO:0043618 regulation of transcription from RNA polymerase II promoter in response to stress
## GO:0048806                                                             genitalia development
## GO:0035567                                               non-canonical Wnt signaling pathway
##            setSize enrichmentScore       NES       pvalue  p.adjust    qvalue
## GO:0048701      12       0.7622172  2.034901 0.0002018079 0.4655543 0.4655543
## GO:0120163      10      -0.7928083 -2.022620 0.0002995446 0.4655543 0.4655543
## GO:0002526      30      -0.5683848 -1.956300 0.0004128475 0.4655543 0.4655543
## GO:0043618      11      -0.7249587 -1.898092 0.0007451933 0.6302472 0.6302472
## GO:0048806      13      -0.7051454 -1.959884 0.0013236795 0.8107510 0.8107510
## GO:0035567      13      -0.6937164 -1.928118 0.0020533641 0.8107510 0.8107510
##            rank                   leading_edge
## GO:0048701 5989 tags=58%, list=13%, signal=51%
## GO:0120163 3233  tags=80%, list=7%, signal=74%
## GO:0002526 7717 tags=53%, list=17%, signal=44%
## GO:0043618 6179 tags=64%, list=14%, signal=55%
## GO:0048806 4836 tags=46%, list=11%, signal=41%
## GO:0035567 9028 tags=62%, list=20%, signal=49%
##                                                                                                                        core_enrichment
## GO:0048701                                                       100156847/100101926/100519993/100153257/100113425/100153098/100153985
## GO:0120163                                                448845/100240743/100515411/100520233/100518262/100512786/100518646/100127171
## GO:0002526 100151836/654407/397094/396663/397051/396677/396842/396927/397031/399500/100158098/397620/100514823/100144442/396799/397061
## GO:0043618                                                       100157127/100520726/100517567/100240743/100511709/100518262/100516343
## GO:0048806                                                                       100515870/397338/100152741/397582/100627056/100518646
## GO:0035567                                             100521912/100524063/100524544/100153078/100155208/100153445/100525170/100627056
  1. KEGG enrichment

The KEGG code of a specific organism can be found at https://rest.kegg.jp/list/organism

tb = gseKEGG(geneList = scores, organism = "ssc", pvalueCutoff = 1)
head(tb)
##                ID                              Description setSize
## ssc04612 ssc04612      Antigen processing and presentation      63
## ssc00260 ssc00260 Glycine, serine and threonine metabolism      42
## ssc00010 ssc00010             Glycolysis / Gluconeogenesis      62
## ssc04216 ssc04216                              Ferroptosis      42
## ssc04640 ssc04640               Hematopoietic cell lineage      89
## ssc04610 ssc04610      Complement and coagulation cascades      80
##          enrichmentScore       NES      pvalue  p.adjust    qvalue  rank
## ssc04612       0.4035308  1.648821 0.002325185 0.5059051 0.5038324 10374
## ssc00260       0.4195898  1.572157 0.009737311 0.5059051 0.5038324  7747
## ssc00010       0.3834772  1.561153 0.006545905 0.5059051 0.5038324  8096
## ssc04216       0.4099693  1.536110 0.013487920 0.5059051 0.5038324 11391
## ssc04640       0.3539926  1.530034 0.004062188 0.5059051 0.5038324  9881
## ssc04610      -0.3422645 -1.470321 0.012003394 0.5059051 0.5038324  8473
##                            leading_edge
## ssc04612 tags=40%, list=23%, signal=31%
## ssc00260 tags=33%, list=17%, signal=28%
## ssc00010 tags=34%, list=18%, signal=28%
## ssc04216 tags=45%, list=25%, signal=34%
## ssc04640 tags=39%, list=22%, signal=31%
## ssc04610 tags=30%, list=19%, signal=24%
##                                                                                                                                                                                                                                                                                                                    core_enrichment
## ssc04612                                                                                         733649/100135040/100153090/396636/100037921/100519184/100156204/397028/100621559/397572/100517711/100622915/100525647/100174943/100157996/397625/397086/100135031/100620407/445528/100135029/100135049/100153386/396742/100515919
## ssc00260                                                                                                                                                                                                 414424/397129/100513890/100155425/100520329/110255953/100151982/397134/106504110/397444/397371/100154776/100515508/733654
## ssc00010                                                                                                                              397129/102161427/100512795/100502559/100522421/396924/397566/397602/100739347/106504110/100517097/396673/733685/100623786/100522261/100154776/100621757/100158218/100134959/733601/100154828
## ssc04216                                                                                                                                                  100158001/100739102/100462749/397645/448982/396971/397108/100170117/397651/100627653/100157521/100623148/399537/100522126/100737517/733595/100322893/100620912/100524695
## ssc04640 100135040/110261056/396636/100037921/100517086/100511343/613130/494013/397554/100038007/100628112/100517053/100511536/100038008/100157996/100512142/100525591/396785/397459/397086/100037942/100521616/100620339/100524166/397160/100523640/397122/396781/445528/100622336/100135049/397669/100153386/100518694/100521477
## ssc04610                                                                                                                  100302537/445464/396568/403164/100157642/396677/733660/100155068/396945/100521017/448981/100037953/100152125/100153513/100525254/100144442/414437/492276/100521609/397347/654289/100144304/397274/397121
  1. MSigDB

Use msigdbr::msigdbr_species() to see which organisms are supported.

gene_sets = msigdbr(species = "pig", category = "H")
map = gene_sets[, c("gs_name", "entrez_gene")]
map$entrez_gene = as.character(map$entrez_gene)

tb = GSEA(geneList = scores, TERM2GENE = map, pvalueCutoff = 1)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
head(tb)
##                                                                    ID
## HALLMARK_APICAL_JUNCTION                     HALLMARK_APICAL_JUNCTION
## HALLMARK_ANGIOGENESIS                           HALLMARK_ANGIOGENESIS
## HALLMARK_ALLOGRAFT_REJECTION             HALLMARK_ALLOGRAFT_REJECTION
## HALLMARK_G2M_CHECKPOINT                       HALLMARK_G2M_CHECKPOINT
## HALLMARK_UNFOLDED_PROTEIN_RESPONSE HALLMARK_UNFOLDED_PROTEIN_RESPONSE
## HALLMARK_OXIDATIVE_PHOSPHORYLATION HALLMARK_OXIDATIVE_PHOSPHORYLATION
##                                                           Description setSize
## HALLMARK_APICAL_JUNCTION                     HALLMARK_APICAL_JUNCTION     192
## HALLMARK_ANGIOGENESIS                           HALLMARK_ANGIOGENESIS      34
## HALLMARK_ALLOGRAFT_REJECTION             HALLMARK_ALLOGRAFT_REJECTION     198
## HALLMARK_G2M_CHECKPOINT                       HALLMARK_G2M_CHECKPOINT     197
## HALLMARK_UNFOLDED_PROTEIN_RESPONSE HALLMARK_UNFOLDED_PROTEIN_RESPONSE     112
## HALLMARK_OXIDATIVE_PHOSPHORYLATION HALLMARK_OXIDATIVE_PHOSPHORYLATION     191
##                                    enrichmentScore       NES     pvalue
## HALLMARK_APICAL_JUNCTION                 0.2977763  1.492434 0.00268864
## HALLMARK_ANGIOGENESIS                   -0.3693229 -1.328068 0.10020877
## HALLMARK_ALLOGRAFT_REJECTION             0.2621035  1.318685 0.03458551
## HALLMARK_G2M_CHECKPOINT                 -0.2537393 -1.281752 0.05241090
## HALLMARK_UNFOLDED_PROTEIN_RESPONSE       0.2747502  1.263752 0.07047619
## HALLMARK_OXIDATIVE_PHOSPHORYLATION       0.2367202  1.184313 0.10112360
##                                     p.adjust    qvalue  rank
## HALLMARK_APICAL_JUNCTION           0.1344320 0.1344320  9499
## HALLMARK_ANGIOGENESIS              0.8426966 0.8426966 12955
## HALLMARK_ALLOGRAFT_REJECTION       0.8426966 0.8426966  9596
## HALLMARK_G2M_CHECKPOINT            0.8426966 0.8426966  7227
## HALLMARK_UNFOLDED_PROTEIN_RESPONSE 0.8426966 0.8426966  7640
## HALLMARK_OXIDATIVE_PHOSPHORYLATION 0.8426966 0.8426966  7432
##                                                      leading_edge
## HALLMARK_APICAL_JUNCTION           tags=30%, list=21%, signal=24%
## HALLMARK_ANGIOGENESIS              tags=56%, list=29%, signal=40%
## HALLMARK_ALLOGRAFT_REJECTION       tags=27%, list=21%, signal=22%
## HALLMARK_G2M_CHECKPOINT            tags=23%, list=16%, signal=20%
## HALLMARK_UNFOLDED_PROTEIN_RESPONSE tags=22%, list=17%, signal=19%
## HALLMARK_OXIDATIVE_PHOSPHORYLATION tags=20%, list=16%, signal=17%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             core_enrichment
## HALLMARK_APICAL_JUNCTION           100240725/100169931/100156321/100302018/100152349/100152497/100157139/100519931/397592/100155841/100049688/100137084/100513941/100153483/100523778/100155319/100513456/110261011/397578/100157760/100125967/100517053/397019/494458/100038012/100622228/100517284/100127479/654325/100511642/100522631/100622559/100152635/100525579/574058/100153175/414396/100626921/100511937/100144541/396904/100626294/100515669/100518152/100153553/100152572/397653/397160/110260605/399688/100157406/100627579/100518044/100286741/100302020/100518997/100523599
## HALLMARK_ANGIOGENESIS                                                                                                                                                                                                                                                                                                                                                                                              397087/396724/100519764/396862/397537/100302364/100522100/100513161/100152401/396900/397328/100157642/100152001/100156358/100156773/100153513/397663/100522743/110256417
## HALLMARK_ALLOGRAFT_REJECTION                                                                100524908/100037937/733649/100135040/100516374/641356/100525313/100153090/100511843/110261056/100738545/396636/397161/100523880/780419/100233184/595126/494013/399503/100038007/397490/100524881/100518254/100525399/100037289/100522293/100622915/100738420/100519653/780426/654325/100157996/397078/100739325/100522631/397282/100141419/100233188/397086/100517070/100521992/100512835/397252/396595/397122/100518074/100622679/445528/100155581/397230/100524265/100620198/100135049/733681
## HALLMARK_G2M_CHECKPOINT                                                                                                              100192318/100623751/100157287/100518101/396953/100512139/100524544/100512350/100526080/100517044/100155762/100151862/100522031/100519078/100153197/100154407/100520403/100156735/414408/100511890/768108/100519111/110261143/100192446/100626221/100515361/102158362/100515360/106504097/100517651/100737380/100522198/100511709/100628125/397015/100515063/397218/110255643/100513721/100152722/100513079/768117/100518243/100525546/106509022/397083
## HALLMARK_UNFOLDED_PROTEIN_RESPONSE                                                                                                                                                                                                                                                                                                   100156321/100525313/100101926/100155472/106504051/100522201/100518582/100620746/100516004/100739082/100521183/100522185/100523065/100738612/100514523/100101928/100522827/100620358/100526140/100217386/100525048/100526157/100521064/397270/100135675
## HALLMARK_OXIDATIVE_PHOSPHORYLATION                                                                                                                                                                                     100156865/397129/102161427/100524239/100524622/503544/100523751/100627838/100525869/100516748/100524598/100513895/100038000/100518959/100521183/100512831/100524459/100515636/574054/397602/414412/397603/397651/396614/110259325/100155047/399537/399540/100512225/100312959/100737214/102159422/100037989/102159115/100154143/100624335/100048954/100512624/396968

Examples for less well-studied organisms

When an OrgDb object is avaiable

Taking dolphin as an example. Since org_db is an OrgDb object, we can directly use gseGO():

library(AnnotationHub)
## Loading required package: BiocFileCache
## Loading required package: dbplyr
## 
## Attaching package: 'AnnotationHub'
## The following object is masked from 'package:Biobase':
## 
##     cache
ah = AnnotationHub()
org_db = ah[["AH112418"]]
## loading from cache
all_genes = keys(org_db, keytype = "ENTREZID")
scores = sort(rnorm(length(all_genes)), decreasing = TRUE)
names(scores) = sample(all_genes)

tb = gseGO(geneList = scores, ont = "BP", OrgDb = org_db, pvalueCutoff = 1)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
head(tb)
##                    ID                              Description setSize
## GO:0046890 GO:0046890 regulation of lipid biosynthetic process      20
## GO:0019216 GO:0019216    regulation of lipid metabolic process      24
## GO:0006629 GO:0006629                  lipid metabolic process     220
## GO:0044282 GO:0044282         small molecule catabolic process      51
## GO:0006304 GO:0006304                         DNA modification      11
## GO:0018022 GO:0018022              peptidyl-lysine methylation      14
##            enrichmentScore       NES       pvalue  p.adjust    qvalue rank
## GO:0046890       0.6177556  1.918075 0.0012748463 0.5945033 0.5945033 5280
## GO:0019216       0.5691529  1.864970 0.0010875849 0.5945033 0.5945033 5280
## GO:0006629       0.2995319  1.509209 0.0009435904 0.5945033 0.5945033 5617
## GO:0044282       0.4490764  1.744340 0.0017502563 0.6121522 0.6121522 6917
## GO:0006304      -0.7007103 -1.838537 0.0048693207 0.7135441 0.7135441 4054
## GO:0018022       0.6509586  1.821876 0.0038247345 0.7135441 0.7135441 1209
##                              leading_edge
## GO:0046890 tags=50%, list=18%, signal=41%
## GO:0019216 tags=42%, list=18%, signal=34%
## GO:0006629 tags=29%, list=19%, signal=24%
## GO:0044282 tags=45%, list=24%, signal=34%
## GO:0006304 tags=45%, list=14%, signal=39%
## GO:0018022  tags=29%, list=4%, signal=27%
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            core_enrichment
## GO:0046890                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             101337436/101334918/101338957/101327641/101320643/101332733/101331374/101321296/101327404/101336602
## GO:0019216                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             101337436/101334918/101338957/101327641/101320643/101332733/101331374/101321296/101327404/101336602
## GO:0006629 101340185/101337436/101333914/101336885/101316588/101324813/101331526/101337016/101315563/101322385/101328759/109550921/101334918/101334078/101338957/101322780/101318950/101318974/101321262/101331134/101326769/101335884/101321281/101337549/101327283/101317587/101323947/101337343/101322610/101325554/101327641/101320643/101335784/101326990/101325541/101316806/117307480/101333670/101339921/101316732/101327166/101315621/101332733/101339526/101325780/101331374/101321305/101321296/101331941/101333502/101315844/101323516/101329447/101327404/101316275/101323737/101338285/101321997/101336602/101325438/101322704/101321880/101326032/101333427
## GO:0044282                                                                                                                                                                                                                                                                                                                                                                                                                           109552917/101338869/101325919/101330067/101322385/101334078/101336987/101320210/101339921/101339526/101325809/101319468/101329447/101332575/101324776/101317055/101319581/101322945/101325191/101336307/101319146/101327172/101324967
## GO:0006304                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               101326146/101320992/101334597/101323636/101331367
## GO:0018022                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         101328050/101327512/101327110/101320953

gseKEGG() also supports many other orgainsms

gseKEGG(geneList = scores, organism = ...)

Manually construct the gene sets

We have introduced many ways to obtain gene sets for less well-studies organisms. The only thing to do here is to convert gene sets to a two-column data frame where gene sets are in the first column and genes are in the second column. Then use GSEA().

GSEA(geneList = scores, TERM2GENE = ...)

Look at the tb object

We used the same variable name tb for the object returned by the various enrichment functions. They are all in the same format. It looks like a table, but be careful, it is actually not:

class(tb)
## [1] "gseaResult"
## attr(,"package")
## [1] "DOSE"
str(tb)
## Formal class 'gseaResult' [package "DOSE"] with 13 slots
##   ..@ result     :'data.frame':  1399 obs. of  11 variables:
##   .. ..$ ID             : chr [1:1399] "GO:0046890" "GO:0019216" "GO:0006629" "GO:0044282" ...
##   .. ..$ Description    : chr [1:1399] "regulation of lipid biosynthetic process" "regulation of lipid metabolic process" "lipid metabolic process" "small molecule catabolic process" ...
##   .. ..$ setSize        : int [1:1399] 20 24 220 51 11 14 22 12 10 13 ...
##   .. ..$ enrichmentScore: num [1:1399] 0.618 0.569 0.3 0.449 -0.701 ...
##   .. ..$ NES            : num [1:1399] 1.92 1.86 1.51 1.74 -1.84 ...
##   .. ..$ pvalue         : num [1:1399] 0.001275 0.001088 0.000944 0.00175 0.004869 ...
##   .. ..$ p.adjust       : num [1:1399] 0.595 0.595 0.595 0.612 0.714 ...
##   .. ..$ qvalue         : num [1:1399] 0.595 0.595 0.595 0.612 0.714 ...
##   .. ..$ rank           : num [1:1399] 5280 5280 5617 6917 4054 ...
##   .. ..$ leading_edge   : chr [1:1399] "tags=50%, list=18%, signal=41%" "tags=42%, list=18%, signal=34%" "tags=29%, list=19%, signal=24%" "tags=45%, list=24%, signal=34%" ...
##   .. ..$ core_enrichment: chr [1:1399] "101337436/101334918/101338957/101327641/101320643/101332733/101331374/101321296/101327404/101336602" "101337436/101334918/101338957/101327641/101320643/101332733/101331374/101321296/101327404/101336602" "101340185/101337436/101333914/101336885/101316588/101324813/101331526/101337016/101315563/101322385/101328759/1"| __truncated__ "109552917/101338869/101325919/101330067/101322385/101334078/101336987/101320210/101339921/101339526/101325809/1"| __truncated__ ...
##   ..@ organism   : chr "Tursiops truncatus"
##   ..@ setType    : chr "BP"
##   ..@ geneSets   :List of 6859
##   .. ..$ GO:0000002: chr [1:5] "101319146" "101322275" "101323809" "101330306" ...
##   .. ..$ GO:0000003: chr [1:87] "101335701" "101315473" "101315779" "101315844" ...
##   .. ..$ GO:0000018: chr [1:8] "101315900" "101319565" "101320166" "101320856" ...
##   .. ..$ GO:0000027: chr "101323418"
##   .. ..$ GO:0000028: chr "101318467"
##   .. ..$ GO:0000038: chr [1:4] "101321997" "101330026" "101334078" "101336885"
##   .. ..$ GO:0000041: chr [1:14] "101321017" "101336438" "101318005" "101320570" ...
##   .. ..$ GO:0000045: chr [1:9] "101316053" "101320545" "101322490" "101331278" ...
##   .. ..$ GO:0000050: chr [1:3] "101316168" "101319666" "101339624"
##   .. ..$ GO:0000054: chr "101332258"
##   .. ..$ GO:0000055: chr "101332258"
##   .. ..$ GO:0000070: chr [1:26] "101315815" "101316537" "101316695" "101318454" ...
##   .. ..$ GO:0000075: chr [1:23] "101316695" "101316733" "101318513" "101318624" ...
##   .. ..$ GO:0000077: chr [1:14] "101316733" "101318513" "101319146" "101320252" ...
##   .. ..$ GO:0000079: chr [1:11] "101324115" "101324169" "101326949" "101327555" ...
##   .. ..$ GO:0000082: chr [1:15] "101317019" "101319146" "101321188" "101322275" ...
##   .. ..$ GO:0000086: chr [1:15] "101316752" "101318513" "101319016" "101319117" ...
##   .. ..$ GO:0000096: chr [1:7] "101318072" "101320414" "101320471" "101320949" ...
##   .. ..$ GO:0000097: chr [1:6] "101320414" "101320471" "101320949" "101331056" ...
##   .. ..$ GO:0000103: chr "101333364"
##   .. ..$ GO:0000122: chr [1:35] "101316020" "101319446" "101320166" "101321309" ...
##   .. ..$ GO:0000132: chr [1:4] "101321450" "101328452" "101330245" "101338521"
##   .. ..$ GO:0000154: chr [1:3] "101318650" "101324394" "101335796"
##   .. ..$ GO:0000165: chr [1:48] "101315703" "101315844" "101316074" "101316105" ...
##   .. ..$ GO:0000183: chr [1:2] "101320305" "101323967"
##   .. ..$ GO:0000184: chr [1:12] "101317208" "101320986" "101322520" "101324720" ...
##   .. ..$ GO:0000209: chr [1:33] "101315684" "101316554" "101319013" "101319502" ...
##   .. ..$ GO:0000212: chr "101326437"
##   .. ..$ GO:0000226: chr [1:61] "101315537" "101316394" "101316518" "101317000" ...
##   .. ..$ GO:0000244: chr "101334043"
##   .. ..$ GO:0000245: chr [1:9] "101321273" "101322182" "101327231" "101330237" ...
##   .. ..$ GO:0000255: chr "101338869"
##   .. ..$ GO:0000266: chr [1:9] "101319797" "101320522" "101322601" "101323172" ...
##   .. ..$ GO:0000270: chr "101327680"
##   .. ..$ GO:0000271: chr [1:9] "101323622" "101324350" "101329904" "101331306" ...
##   .. ..$ GO:0000272: chr "101330457"
##   .. ..$ GO:0000278: chr [1:95] "101315815" "101316323" "101316500" "101316537" ...
##   .. ..$ GO:0000280: chr [1:45] "101315815" "101316537" "101316695" "101316904" ...
##   .. ..$ GO:0000281: chr [1:8] "101318454" "101318926" "101329144" "101331655" ...
##   .. ..$ GO:0000288: chr [1:15] "101316306" "101322274" "101324369" "101325801" ...
##   .. ..$ GO:0000289: chr [1:4] "101322274" "101324369" "101325801" "101337967"
##   .. ..$ GO:0000290: chr [1:5] "101322274" "101329036" "101331769" "101332555" ...
##   .. ..$ GO:0000291: chr [1:4] "101322274" "101325801" "101326312" "101332993"
##   .. ..$ GO:0000301: chr "101335576"
##   .. ..$ GO:0000302: chr [1:15] "101316954" "101319583" "101321208" "101322317" ...
##   .. ..$ GO:0000303: chr [1:2] "101325383" "101327438"
##   .. ..$ GO:0000305: chr [1:2] "101325383" "101327438"
##   .. ..$ GO:0000320: chr "101317123"
##   .. ..$ GO:0000338: chr [1:3] "101320449" "101328620" "101337481"
##   .. ..$ GO:0000375: chr [1:74] "101315667" "101316328" "101316554" "101317380" ...
##   .. ..$ GO:0000377: chr [1:74] "101315667" "101316328" "101316554" "101317380" ...
##   .. ..$ GO:0000380: chr [1:6] "101317431" "101324720" "101325768" "101328568" ...
##   .. ..$ GO:0000381: chr [1:5] "101324720" "101325768" "101328568" "101333925" ...
##   .. ..$ GO:0000387: chr [1:11] "101318020" "101318583" "101318698" "101319671" ...
##   .. ..$ GO:0000394: chr [1:5] "101319579" "101327326" "101333559" "101338459" ...
##   .. ..$ GO:0000395: chr "101327231"
##   .. ..$ GO:0000398: chr [1:74] "101315667" "101316328" "101316554" "101317380" ...
##   .. ..$ GO:0000413: chr [1:18] "101316348" "101322428" "101325477" "101325900" ...
##   .. ..$ GO:0000422: chr [1:11] "101316053" "101316255" "101317608" "101319146" ...
##   .. ..$ GO:0000423: chr [1:6] "101316053" "101317608" "101319146" "101325333" ...
##   .. ..$ GO:0000425: chr [1:2] "101322317" "101334078"
##   .. ..$ GO:0000429: chr "101329457"
##   .. ..$ GO:0000430: chr "101329457"
##   .. ..$ GO:0000432: chr "101329457"
##   .. ..$ GO:0000436: chr "101329457"
##   .. ..$ GO:0000459: chr "101339801"
##   .. ..$ GO:0000460: chr [1:2] "101318462" "101339801"
##   .. ..$ GO:0000463: chr [1:3] "101315781" "101320270" "101327032"
##   .. ..$ GO:0000466: chr "101339801"
##   .. ..$ GO:0000467: chr "101339801"
##   .. ..$ GO:0000469: chr "101339801"
##   .. ..$ GO:0000470: chr [1:3] "101315781" "101320270" "101327032"
##   .. ..$ GO:0000491: chr "101320200"
##   .. ..$ GO:0000493: chr "101320200"
##   .. ..$ GO:0000578: chr "101320735"
##   .. ..$ GO:0000711: chr "101330719"
##   .. ..$ GO:0000722: chr "101328782"
##   .. ..$ GO:0000723: chr [1:14] "101315665" "101319304" "101320264" "101324392" ...
##   .. ..$ GO:0000724: chr [1:20] "101315841" "101315900" "101317036" "101319304" ...
##   .. ..$ GO:0000725: chr [1:21] "101315841" "101315900" "101317036" "101319304" ...
##   .. ..$ GO:0000731: chr [1:7] "101319167" "101319446" "101323869" "101324001" ...
##   .. ..$ GO:0000768: chr "109552510"
##   .. ..$ GO:0000819: chr [1:28] "101315815" "101316537" "101316695" "101318454" ...
##   .. ..$ GO:0000902: chr [1:41] "101316391" "101316954" "101317123" "101317226" ...
##   .. ..$ GO:0000904: chr [1:17] "101317123" "101317632" "101318355" "101319110" ...
##   .. ..$ GO:0000910: chr [1:18] "101316518" "101316752" "101318454" "101318926" ...
##   .. ..$ GO:0000912: chr "101324212"
##   .. ..$ GO:0000915: chr "101324212"
##   .. ..$ GO:0000956: chr [1:30] "101316306" "101317208" "101320986" "101322274" ...
##   .. ..$ GO:0000957: chr "101333806"
##   .. ..$ GO:0000958: chr "101333806"
##   .. ..$ GO:0000959: chr [1:5] "101317667" "101319586" "101320594" "101333806" ...
##   .. ..$ GO:0000963: chr [1:2] "101317667" "101319586"
##   .. ..$ GO:0000964: chr "101319586"
##   .. ..$ GO:0000966: chr [1:6] "101319586" "101322154" "101326512" "101327203" ...
##   .. ..$ GO:0001101: chr [1:5] "101316666" "101320291" "101326859" "101328169" ...
##   .. ..$ GO:0001109: chr "101326268"
##   .. ..$ GO:0001111: chr "101326268"
##   .. ..$ GO:0001173: chr "101324154"
##   .. .. [list output truncated]
##   ..@ geneList   : Named num [1:28931] 4.03 3.93 3.9 3.8 3.76 ...
##   .. ..- attr(*, "names")= chr [1:28931] "101322544" "101328796" "101321568" "101319121" ...
##   ..@ keytype    : chr "ENTREZID"
##   ..@ permScores : num[0 , 0 ] 
##   ..@ params     :List of 6
##   .. ..$ pvalueCutoff : num 1
##   .. ..$ eps          : num 1e-10
##   .. ..$ pAdjustMethod: chr "BH"
##   .. ..$ exponent     : num 1
##   .. ..$ minGSSize    : num 10
##   .. ..$ maxGSSize    : num 500
##   ..@ gene2Symbol: chr(0) 
##   ..@ readable   : logi FALSE
##   ..@ termsim    : num[0 , 0 ] 
##   ..@ method     : chr(0) 
##   ..@ dr         : list()

In the ORA analysis by clusterProfiler, we mentioned using tb@result returns the full enrichment table. But this is not the case for the GSEA anlaysis!. In GSEA anlaysis by clusterProfiler, tb@result still returns the significant terms. Add pvalueCutoff = 1 argument if you need the full table.

Practice

Practice 1

The input for e.g. gseGO() is a vector of gene-level scores, thus what metric is used for measuring gene-level differentiation has impact on the GSEA analysis. Here you use the following four gene-level metrics, apply them with gseGO() (GO BP + human) and compare the GSEA results.

Here we use the p53_expr.rds dataset.

lt = readRDS(system.file("extdata", "p53_expr.rds", package = "GSEAtraining"))
expr = lt$expr
condition = lt$condition

First convert gene IDs to EntreZ IDs for expr:

##   gene id might be SYMBOL (p =  0.610 )
## 'select()' returned 1:many mapping between keys and columns

Then calculate the column indicies of WT and MUT

ind_MUT = which(condition == "MUT")
ind_WT = which(condition == "WT")

The four gene-level metrics:

  1. signal-to-noise ratio
  2. t-value
  3. log2 fold change
  4. signed log10 p-values

They can be calculated as:

s2n = apply(mat, 1, function(x) {
    x1 = x[ind_MUT]
    x2 = x[ind_WT]

    (mean(x1) - mean(x2))/(sd(x1) + sd(x2))
})

tvalue = apply(mat, 1, function(x) {
    x1 = x[ind_MUT]
    x2 = x[ind_WT]

    t.test(x1, x2)$statistic
})

log2fc = apply(mat, 1, function(x) {
    x1 = x[ind_MUT]
    x2 = x[ind_WT]

    log2(mean(x1)/mean(x2))
})

logp = apply(mat, 1, function(x) {
    x1 = x[ind_MUT]
    x2 = x[ind_WT]

    t = t.test(x1, x2)
    sign(t$statistic)*(-log10(t$p.value))
})

Now use these four numeric vectors to perform GSEA analysis (on GO BP gene sets).

Solution

First to sort all the numeric vectors:

s2n = sort(s2n, decreasing = TRUE)
tvalue = sort(tvalue, decreasing = TRUE)
log2fc = sort(log2fc, decreasing = TRUE)
logp = sort(logp, decreasing = TRUE)
tb1 = gseGO(geneList = s2n, ont = "BP", OrgDb = org.Hs.eg.db)
## preparing geneSet collections...
## GSEA analysis...
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
tb2 = gseGO(geneList = tvalue, ont = "BP", OrgDb = org.Hs.eg.db)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
tb3 = gseGO(geneList = log2fc, ont = "BP", OrgDb = org.Hs.eg.db)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...
tb4 = gseGO(geneList = logp, ont = "BP", OrgDb = org.Hs.eg.db)
## preparing geneSet collections...
## GSEA analysis...
## leading edge analysis...
## done...

Get the “real data frames” for all terms.

tb1 = tb1@result
tb2 = tb2@result
tb3 = tb3@result
tb4 = tb4@result
library(eulerr)
plot(euler(list("s2n" = tb1$ID[tb1$p.adjust < 0.05],
                "tvalue" = tb2$ID[tb2$p.adjust < 0.05],
                "log2fc" = tb3$ID[tb3$p.adjust < 0.05],
                "logp" = tb4$ID[tb4$p.adjust < 0.05])),
    quantities = TRUE)

par(mfrow = c(2, 2))
plot(s2n)
plot(tvalue)
plot(log2fc)
plot(logp)

Conclusion: do not use log2 fold change for GSEA analysis, and using signal-to-noise or t-value (or other similar statistics) is always a good choice.