A “landscape visualiztion” is very useful for complex omics studies, which give a straightforward impression of the associations between different types of datasets.
In the dataset we used in this document, there are various statistics for whole-genome methylome, and chromatin segmentations from five different histone modifications.
The data is about comparisons of four tumor subtypes (encoded as 1-4), also compared to normal tissues. The DMRs (differentially methylated regions) are the primary data, we associate other genomic information to DMRs (separated as hyper-methylated DMRs and hypo-methylated DMRs).
data = readRDS(system.file("extdata", package = "ComplexHeatmap", "dmr_summary.rds"))
data
## $label
## [1] "hyper1" "hypo1" "hyper2" "hypo2" "hyper3" "hypo3" "hyper4" "hypo4"
##
## $mean_meth
## tumor normal
## hyper1 0.7517288 0.5465373
## hypo1 0.6441198 0.8477274
## hyper2 0.6861487 0.5167563
## hypo2 0.6673283 0.8507612
## hyper3 0.5897328 0.3848513
## hypo3 0.5814106 0.8489631
## hyper4 0.7100953 0.5134402
## hypo4 0.6295212 0.8408403
##
## $n_gr
## hyper1 hypo1 hyper2 hypo2 hyper3 hypo3 hyper4 hypo4
## 94718 245786 45552 364888 17945 748123 57296 409340
##
## $n_corr
## neg pos
## hyper1 0.18088431 0.13939272
## hypo1 0.10038407 0.28711562
## hyper2 0.20567703 0.09652705
## hypo2 0.04831620 0.34556905
## hyper3 0.22134299 0.11016996
## hypo3 0.12167384 0.26618350
## hyper4 0.23479824 0.11372522
## hypo4 0.07356721 0.30250403
##
## $dist_tss
## <1kb 1kb~5kb 5kb~10kb >10kb
## hyper1 0.125372157 0.13866425 0.08865263 0.6473110
## hypo1 0.009947678 0.03284565 0.04104790 0.9161588
## hyper2 0.094968388 0.14800667 0.09617580 0.6608491
## hypo2 0.009841376 0.03039289 0.03744163 0.9223241
## hyper3 0.163443856 0.18127612 0.10638061 0.5488994
## hypo3 0.011925846 0.05011208 0.05455253 0.8834095
## hyper4 0.134948338 0.15337894 0.09471865 0.6169541
## hypo4 0.010165144 0.03388870 0.04205062 0.9138955
##
## $gene_anno
## gene intergenic
## hyper1 0.7544481 0.2455519
## hypo1 0.2614090 0.7385910
## hyper2 0.8071114 0.1928886
## hypo2 0.2766842 0.7233158
## hyper3 0.7429300 0.2570700
## hypo3 0.3378101 0.6621899
## hyper4 0.7759629 0.2240371
## hypo4 0.3063298 0.6936702
##
## $cgi_anno
## cgi cgi-shore
## hyper1 0.0687877232 0.16460999
## hypo1 0.0007826092 0.01542723
## hyper2 0.0781542960 0.15618758
## hypo2 0.0005570060 0.01512957
## hyper3 0.2222821610 0.25524956
## hypo3 0.0012653130 0.02689185
## hyper4 0.1082902246 0.18727782
## hypo4 0.0006914389 0.01744055
##
## $mat_enrich_gf
## gene tss exon intergenic cgi cgi_shore TFBS LINE SINE
## hyper1 186.629651 39.375569 79.80908 -73.49671 73.40778 125.59968 110.03336 -40.10956 -11.067604
## hypo1 -78.814823 -11.518447 -25.05086 340.09976 -12.06797 -44.24508 14.77201 145.85499 55.391206
## hyper2 149.194948 17.059861 70.63897 -72.80415 49.95050 81.97806 96.32553 -28.25040 -13.818354
## hypo2 -78.987382 -8.083880 -31.15724 403.09614 -16.52595 -49.81784 26.04397 148.18404 54.747895
## hyper3 68.919217 18.387836 35.63760 -38.44248 97.23184 86.31879 49.67611 -18.88702 -16.758948
## hypo3 2.863683 0.582912 6.44983 667.14849 -18.12583 -23.49314 79.76049 174.75181 265.447670
## hyper4 145.654735 31.279502 72.31768 -72.77763 78.96283 115.41691 88.58372 -30.08615 -7.177447
## hypo4 -50.302007 -8.544744 -23.12303 395.21771 -16.82151 -44.33664 51.62248 142.26759 67.569356
##
## $mat_pct_st
## TssActive Transcript Enhancer Heterochromatin TssBiv Repressive Quies
## hyper1 0.079208648 0.39080543 0.13935249 0.07361768 0.0261148890 0.1134716 0.17742925
## hypo1 0.008036606 0.01122827 0.01529269 0.09036155 0.0003173593 0.3035827 0.57118087
## hyper2 0.091259523 0.49649222 0.12661900 0.02247488 0.0288367133 0.1175651 0.11675255
## hypo2 0.010212708 0.03349391 0.02232999 0.05548943 0.0003742431 0.3602676 0.51783215
## hyper3 0.103545135 0.31006881 0.20371315 0.08197413 0.0694082448 0.1686201 0.06267044
## hypo3 0.006652799 0.08605831 0.02815086 0.06896749 0.0008910815 0.2976250 0.51165449
## hyper4 0.080303091 0.40841489 0.16189657 0.04445684 0.0472732240 0.1384810 0.11917443
## hypo4 0.010011274 0.01830283 0.02770096 0.05767671 0.0005919990 0.3630858 0.52263038
##
## $mat_enrich_st
## TssActive Transcript Enhancer Heterochromatin TssBiv Repressive Quies
## hyper1 99.33523 156.45137 182.919376 -11.417893 72.809429 -36.14734 -71.86820
## hypo1 -22.39477 -167.82775 -32.255096 57.060250 -11.155807 254.64556 207.90120
## hyper2 73.04860 113.41170 119.773261 -16.601260 50.464608 -36.81548 -63.40034
## hypo2 -24.38352 -188.63585 -5.324457 69.707457 -12.311633 338.06979 229.73787
## hyper3 46.96870 42.00094 108.135501 -4.813011 48.455702 -15.56497 -46.21148
## hypo3 -51.04874 -169.02679 -13.513567 91.848153 -6.826916 434.41616 415.30218
## hyper4 94.04493 121.64155 186.858769 -18.517708 75.716017 -46.01805 -78.62866
## hypo4 -34.41140 -194.93498 -10.730393 39.105634 -12.177558 389.63095 247.53327
The following objects (mainly matrices) will be used for visualization:
mean_meth: Mean methylation in the genome, in both
tumor samples and normal samples.n_gr: Number of DMRs.n_corr: Fraction of DMRs that show significant
correlation to expression of their nearest genes.dist_tss: Average distance to the nearest TSS.gene_anno: Fraction of DMRs that overlap to genes or
intergenic regions.cgi_anno: Fraction of DMRs that overlap to CGI (CpG
islands).mat_enrich_gf: Enrichment to various genomic features,
based on a permutation-based method.mat_pct_st: Fraction of DMRs that overlap to genomic
segments in different chromatin states.mat_enrich_st: Enrichment to various chromatin
segmentations, based on a permutation-based method.For simplicity, we attach data to the search list so
that we do not need to type data$ for using its
elements.
attach(data)
We will create a visualization that links these nine different meaurements.
As they are either numeric vectors or matrices, let’s first plot each of them separately.
We use heatmap for mean methylation.
library(circlize)
library(ComplexHeatmap)
meth_col_fun = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red"))
Heatmap(mean_meth, col = meth_col_fun,
cluster_rows = FALSE, cluster_columns = FALSE)
Barplot for the number of DMRs.
rowAnnotation(n_gr = anno_barplot(n_gr), width = unit(4, "cm")) + NULL
Stacked barplot for the n_corr.
rowAnnotation(n_corr = anno_barplot(n_corr), width = unit(4, "cm")) + NULL
Stacked barplot for the dist_tss.
rowAnnotation(dist_tss = anno_barplot(dist_tss), width = unit(4, "cm")) + NULL
Stacked barplot for the gene_anno.
rowAnnotation(gene_anno = anno_barplot(gene_anno), width = unit(4, "cm")) + NULL
Stacked barplot for the cgi_anno.
rowAnnotation(cgi_anno = anno_barplot(cgi_anno), width = unit(4, "cm")) + NULL
Heatmap for mat_enrich_gf.
Heatmap(mat_enrich_gf,
cluster_rows = FALSE, cluster_columns = FALSE)
Stacked barplot for the mat_pct_st.
rowAnnotation(mat_pct_st = anno_barplot(mat_pct_st), width = unit(4, "cm")) + NULL
Heatmap for mat_enrich_st.
Heatmap(mat_enrich_st,
cluster_rows = FALSE, cluster_columns = FALSE)
Since these the nine objects have the same row orders (i.e. rows, or elements, are already corresponded), we can simply concatenate all the nine heatmaps and barplot annotations to build a comprehensive visualization:
Heatmap(mean_meth, col = meth_col_fun,
cluster_rows = FALSE, cluster_columns = FALSE) +
rowAnnotation(n_gr = anno_barplot(n_gr), width = unit(2, "cm")) +
rowAnnotation(n_corr = anno_barplot(n_corr), width = unit(2, "cm")) +
rowAnnotation(dist_tss = anno_barplot(dist_tss), width = unit(2, "cm")) +
rowAnnotation(gene_anno = anno_barplot(gene_anno), width = unit(2, "cm")) +
rowAnnotation(cgi_anno = anno_barplot(cgi_anno), width = unit(2, "cm")) +
Heatmap(mat_enrich_gf,
cluster_rows = FALSE, cluster_columns = FALSE) +
rowAnnotation(mat_pct_st = anno_barplot(mat_pct_st), width = unit(2, "cm")) +
Heatmap(mat_enrich_st,
cluster_rows = FALSE, cluster_columns = FALSE)
Maybe split the rows by hyper and hypo can emphasize the difference of these two groups of DMRs.
row_split = gsub("\\d+$", "", rownames(mean_meth))
Heatmap(mean_meth, col = meth_col_fun,
cluster_rows = FALSE, cluster_columns = FALSE,
row_split = row_split) +
rowAnnotation(n_gr = anno_barplot(n_gr), width = unit(2, "cm")) +
rowAnnotation(n_corr = anno_barplot(n_corr), width = unit(2, "cm")) +
rowAnnotation(dist_tss = anno_barplot(dist_tss), width = unit(2, "cm")) +
rowAnnotation(gene_anno = anno_barplot(gene_anno), width = unit(2, "cm")) +
rowAnnotation(cgi_anno = anno_barplot(cgi_anno), width = unit(2, "cm")) +
Heatmap(mat_enrich_gf,
cluster_rows = FALSE, cluster_columns = FALSE) +
rowAnnotation(mat_pct_st = anno_barplot(mat_pct_st), width = unit(2, "cm")) +
Heatmap(mat_enrich_st,
cluster_rows = FALSE, cluster_columns = FALSE)
That is the basic structure of the visualization. We can do some customizations, especially the colors for different components in the heatmap list.
corr_col = c("green", "red")
dist_tss_col = c("#FF0000", "#FF7352", "#FFB299", "#FFD9CB")
gene_anno_col = c("green", "blue")
cgi_anno_col = c("#FFA500", "#FFD191")
z_score_col_fun = colorRamp2(c(-200, 0, 200), c("green", "white", "red"))
state_col = c("#FF0000", "#008000", "#C2E105", "#8A91D0", "#CD5C5C", "#808080", "#000000")
anno_width = unit(3.4, "cm")
ht_list = rowAnnotation(text = anno_text(label, location = unit(1, "npc"), just = "right",
gp = gpar(fontsize = 12)))
ht_list = ht_list + Heatmap(mean_meth, name = "mean_meth", col = meth_col_fun,
cluster_rows = FALSE, row_title = NULL, cluster_columns = FALSE, show_row_names = FALSE, column_names_rot = 45,
column_names_gp = gpar(fontsize = 9),
heatmap_legend_param = list(title = "Methylation", direction = "horizontal", legend_width = unit(3, "cm")),
width = ncol(mean_meth)*unit(4, "mm")) +
rowAnnotation("n_gr" = anno_barplot(n_gr, bar_width = 1, width = anno_width),
show_annotation_name = FALSE) +
rowAnnotation("n_corr" = anno_barplot(n_corr, bar_width = 1, gp = gpar(fill = corr_col),
width = anno_width), show_annotation_name = FALSE) +
rowAnnotation("dist_tss" = anno_barplot(dist_tss, bar_width = 1, gp = gpar(fill = dist_tss_col),
width = anno_width), show_annotation_name = FALSE) +
rowAnnotation("gene_anno" = anno_barplot(gene_anno, bar_width = 1, gp = gpar(fill = gene_anno_col),
width = anno_width), show_annotation_name = FALSE) +
rowAnnotation("cgi_anno" = anno_barplot(cgi_anno, bar_width = 1, gp = gpar(fill = cgi_anno_col),
width = anno_width), show_annotation_name = FALSE) +
Heatmap(mat_enrich_gf, name = "enrich_gf", col = z_score_col_fun, cluster_columns = FALSE,
width = unit(ncol(mat_enrich_gf)*4, "mm"), column_title = "", column_names_rot = 45,
column_names_gp = gpar(fontsize = 9), show_heatmap_legend = FALSE) +
rowAnnotation("pct_st" = anno_barplot(mat_pct_st, bar_width = 1, gp = gpar(fill = state_col),
width = anno_width), show_annotation_name = FALSE) +
Heatmap(mat_enrich_st, name = "enrich_st", col = z_score_col_fun, cluster_columns = FALSE,
width = unit(ncol(mat_enrich_st)*4, "mm"), column_title = "", show_heatmap_legend = FALSE,
column_names_gp = gpar(col = state_col, fontsize = 9), show_row_names = FALSE, column_names_rot = 45)
lgd_list = list(
Legend(labels = c("negative", "positive"), title = "Correlation",
legend_gp = gpar(fill = c("green", "red"))),
Legend(labels = c("gene", "intergenic"), title = "Gene annotation",
legend_gp = gpar(fill = gene_anno_col)),
Legend(labels = c("<1kb", "1kb~5kb", "5kb~10kb", ">10kb"), title = "Distance to TSS",
legend_gp = gpar(fill = dist_tss_col), nrow = 2),
Legend(labels = c("CGI", "CGI shore"), title = "CGI annotation",
legend_gp = gpar(fill = cgi_anno_col)),
Legend(col_fun = z_score_col_fun, title = "Z-score",direction = "horizontal",
at = c(-200, 0, 200),
legend_width = unit(3, "cm")),
Legend(labels = colnames(mat_enrich_st), title = "Chromatin states",
legend_gp = gpar(fill = state_col), nrow = 2)
)
draw(ht_list, padding = unit(c(2, 2, 16, 2), "mm"), row_split = row_split,
heatmap_legend_list = lgd_list, heatmap_legend_side = "bottom")
anno_title = c("n_gr" = "Number of\nDMRs", "n_corr" = "Significantly\ncorrelated genes",
"gene_anno" = "Gene annotation", "dist_tss" = "Distance to TSS",
"cgi_anno" = "CGI annotation", "pct_st" = "Overlap to\nChromatin states")
for(an in names(anno_title)) {
decorate_annotation(an, {
grid.text(anno_title[an], y = unit(1, "npc") + unit(3, "mm"), just = "bottom")
})
}
ht_title = c("mean_meth" = "Mean\nmethylation", "enrich_gf" = "Enrichment to\ngenomic features",
"enrich_st" = "Enrichment to\nchromatin states")
for(an in names(ht_title)) {
decorate_heatmap_body(an, {
grid.text(ht_title[an], y = unit(1, "npc") + unit(3, "mm"), just = "bottom")
})
}