Landscape Visualization

Analysis

In the dataset we used in this document, there are various statistics for whole-genome methylome, and chromatin segmentations from five different histone modifications.

The data is about comparisons of four tumor subtypes (encoded as 1-4), also compared to normal tissues. The DMRs (differentially methylated regions) are the primary data, we associate other genomic information to DMRs (separated as hyper-methylated DMRs and hypo-methylated DMRs).

data = readRDS(system.file("extdata", package = "ComplexHeatmap", "dmr_summary.rds"))
data

## $label
## [1] "hyper1" "hypo1"  "hyper2" "hypo2"  "hyper3" "hypo3"  "hyper4" "hypo4" 
## 
## $mean_meth
##            tumor    normal
## hyper1 0.7517288 0.5465373
## hypo1  0.6441198 0.8477274
## hyper2 0.6861487 0.5167563
## hypo2  0.6673283 0.8507612
## hyper3 0.5897328 0.3848513
## hypo3  0.5814106 0.8489631
## hyper4 0.7100953 0.5134402
## hypo4  0.6295212 0.8408403
## 
## $n_gr
## hyper1  hypo1 hyper2  hypo2 hyper3  hypo3 hyper4  hypo4 
##  94718 245786  45552 364888  17945 748123  57296 409340 
## 
## $n_corr
##               neg        pos
## hyper1 0.18088431 0.13939272
## hypo1  0.10038407 0.28711562
## hyper2 0.20567703 0.09652705
## hypo2  0.04831620 0.34556905
## hyper3 0.22134299 0.11016996
## hypo3  0.12167384 0.26618350
## hyper4 0.23479824 0.11372522
## hypo4  0.07356721 0.30250403
## 
## $dist_tss
##               <1kb    1kb~5kb   5kb~10kb     >10kb
## hyper1 0.125372157 0.13866425 0.08865263 0.6473110
## hypo1  0.009947678 0.03284565 0.04104790 0.9161588
## hyper2 0.094968388 0.14800667 0.09617580 0.6608491
## hypo2  0.009841376 0.03039289 0.03744163 0.9223241
## hyper3 0.163443856 0.18127612 0.10638061 0.5488994
## hypo3  0.011925846 0.05011208 0.05455253 0.8834095
## hyper4 0.134948338 0.15337894 0.09471865 0.6169541
## hypo4  0.010165144 0.03388870 0.04205062 0.9138955
## 
## $gene_anno
##             gene intergenic
## hyper1 0.7544481  0.2455519
## hypo1  0.2614090  0.7385910
## hyper2 0.8071114  0.1928886
## hypo2  0.2766842  0.7233158
## hyper3 0.7429300  0.2570700
## hypo3  0.3378101  0.6621899
## hyper4 0.7759629  0.2240371
## hypo4  0.3063298  0.6936702
## 
## $cgi_anno
##                 cgi  cgi-shore
## hyper1 0.0687877232 0.16460999
## hypo1  0.0007826092 0.01542723
## hyper2 0.0781542960 0.15618758
## hypo2  0.0005570060 0.01512957
## hyper3 0.2222821610 0.25524956
## hypo3  0.0012653130 0.02689185
## hyper4 0.1082902246 0.18727782
## hypo4  0.0006914389 0.01744055
## 
## $mat_enrich_gf
##              gene        tss      exon intergenic       cgi cgi_shore      TFBS      LINE       SINE
## hyper1 186.629651  39.375569  79.80908  -73.49671  73.40778 125.59968 110.03336 -40.10956 -11.067604
## hypo1  -78.814823 -11.518447 -25.05086  340.09976 -12.06797 -44.24508  14.77201 145.85499  55.391206
## hyper2 149.194948  17.059861  70.63897  -72.80415  49.95050  81.97806  96.32553 -28.25040 -13.818354
## hypo2  -78.987382  -8.083880 -31.15724  403.09614 -16.52595 -49.81784  26.04397 148.18404  54.747895
## hyper3  68.919217  18.387836  35.63760  -38.44248  97.23184  86.31879  49.67611 -18.88702 -16.758948
## hypo3    2.863683   0.582912   6.44983  667.14849 -18.12583 -23.49314  79.76049 174.75181 265.447670
## hyper4 145.654735  31.279502  72.31768  -72.77763  78.96283 115.41691  88.58372 -30.08615  -7.177447
## hypo4  -50.302007  -8.544744 -23.12303  395.21771 -16.82151 -44.33664  51.62248 142.26759  67.569356
## 
## $mat_pct_st
##          TssActive Transcript   Enhancer Heterochromatin       TssBiv Repressive      Quies
## hyper1 0.079208648 0.39080543 0.13935249      0.07361768 0.0261148890  0.1134716 0.17742925
## hypo1  0.008036606 0.01122827 0.01529269      0.09036155 0.0003173593  0.3035827 0.57118087
## hyper2 0.091259523 0.49649222 0.12661900      0.02247488 0.0288367133  0.1175651 0.11675255
## hypo2  0.010212708 0.03349391 0.02232999      0.05548943 0.0003742431  0.3602676 0.51783215
## hyper3 0.103545135 0.31006881 0.20371315      0.08197413 0.0694082448  0.1686201 0.06267044
## hypo3  0.006652799 0.08605831 0.02815086      0.06896749 0.0008910815  0.2976250 0.51165449
## hyper4 0.080303091 0.40841489 0.16189657      0.04445684 0.0472732240  0.1384810 0.11917443
## hypo4  0.010011274 0.01830283 0.02770096      0.05767671 0.0005919990  0.3630858 0.52263038
## 
## $mat_enrich_st
##        TssActive Transcript   Enhancer Heterochromatin     TssBiv Repressive     Quies
## hyper1  99.33523  156.45137 182.919376      -11.417893  72.809429  -36.14734 -71.86820
## hypo1  -22.39477 -167.82775 -32.255096       57.060250 -11.155807  254.64556 207.90120
## hyper2  73.04860  113.41170 119.773261      -16.601260  50.464608  -36.81548 -63.40034
## hypo2  -24.38352 -188.63585  -5.324457       69.707457 -12.311633  338.06979 229.73787
## hyper3  46.96870   42.00094 108.135501       -4.813011  48.455702  -15.56497 -46.21148
## hypo3  -51.04874 -169.02679 -13.513567       91.848153  -6.826916  434.41616 415.30218
## hyper4  94.04493  121.64155 186.858769      -18.517708  75.716017  -46.01805 -78.62866
## hypo4  -34.41140 -194.93498 -10.730393       39.105634 -12.177558  389.63095 247.53327

The following objects (mainly matrices) will be used for visualization:

mean_meth: Mean methylation in the genome, in both tumor samples and normal samples.
n_gr: Number of DMRs.
n_corr: Fraction of DMRs that show significant correlation to expression of their nearest genes.
dist_tss: Average distance to the nearest TSS.
gene_anno: Fraction of DMRs that overlap to genes or intergenic regions.
cgi_anno: Fraction of DMRs that overlap to CGI (CpG islands).
mat_enrich_gf: Enrichment to various genomic features, based on a permutation-based method.
mat_pct_st: Fraction of DMRs that overlap to genomic segments in different chromatin states.
mat_enrich_st: Enrichment to various chromatin segmentations, based on a permutation-based method.

For simplicity, we attach data to the search list so that we do not need to type data$ for using its elements.

attach(data)

We will create a visualization that links these nine different meaurements.

As they are either numeric vectors or matrices, let’s first plot each of them separately.

We use heatmap for mean methylation.

library(circlize)
library(ComplexHeatmap)
meth_col_fun = colorRamp2(c(0, 0.5, 1), c("blue", "white", "red"))
Heatmap(mean_meth, col = meth_col_fun, 
    cluster_rows = FALSE, cluster_columns = FALSE)

Barplot for the number of DMRs.

rowAnnotation(n_gr = anno_barplot(n_gr), width = unit(4, "cm")) + NULL

Stacked barplot for the n_corr.

rowAnnotation(n_corr = anno_barplot(n_corr), width = unit(4, "cm")) + NULL

Stacked barplot for the dist_tss.

rowAnnotation(dist_tss = anno_barplot(dist_tss), width = unit(4, "cm")) + NULL

Stacked barplot for the gene_anno.

rowAnnotation(gene_anno = anno_barplot(gene_anno), width = unit(4, "cm")) + NULL

Stacked barplot for the cgi_anno.

rowAnnotation(cgi_anno = anno_barplot(cgi_anno), width = unit(4, "cm")) + NULL

Heatmap for mat_enrich_gf.

Heatmap(mat_enrich_gf,
    cluster_rows = FALSE, cluster_columns = FALSE)

Stacked barplot for the mat_pct_st.

rowAnnotation(mat_pct_st = anno_barplot(mat_pct_st), width = unit(4, "cm")) + NULL

Heatmap for mat_enrich_st.

Heatmap(mat_enrich_st,
    cluster_rows = FALSE, cluster_columns = FALSE)

Since these the nine objects have the same row orders (i.e. rows, or elements, are already corresponded), we can simply concatenate all the nine heatmaps and barplot annotations to build a comprehensive visualization:

Heatmap(mean_meth, col = meth_col_fun, 
    cluster_rows = FALSE, cluster_columns = FALSE) +
rowAnnotation(n_gr = anno_barplot(n_gr), width = unit(2, "cm")) + 
rowAnnotation(n_corr = anno_barplot(n_corr), width = unit(2, "cm")) +
rowAnnotation(dist_tss = anno_barplot(dist_tss), width = unit(2, "cm")) +
rowAnnotation(gene_anno = anno_barplot(gene_anno), width = unit(2, "cm")) +
rowAnnotation(cgi_anno = anno_barplot(cgi_anno), width = unit(2, "cm")) +
Heatmap(mat_enrich_gf,
    cluster_rows = FALSE, cluster_columns = FALSE) +
rowAnnotation(mat_pct_st = anno_barplot(mat_pct_st), width = unit(2, "cm")) +
Heatmap(mat_enrich_st,
    cluster_rows = FALSE, cluster_columns = FALSE)

Maybe split the rows by hyper and hypo can emphasize the difference of these two groups of DMRs.

row_split = gsub("\\d+$", "", rownames(mean_meth))
Heatmap(mean_meth, col = meth_col_fun, 
    cluster_rows = FALSE, cluster_columns = FALSE,
    row_split = row_split) +
rowAnnotation(n_gr = anno_barplot(n_gr), width = unit(2, "cm")) + 
rowAnnotation(n_corr = anno_barplot(n_corr), width = unit(2, "cm")) +
rowAnnotation(dist_tss = anno_barplot(dist_tss), width = unit(2, "cm")) +
rowAnnotation(gene_anno = anno_barplot(gene_anno), width = unit(2, "cm")) +
rowAnnotation(cgi_anno = anno_barplot(cgi_anno), width = unit(2, "cm")) +
Heatmap(mat_enrich_gf,
    cluster_rows = FALSE, cluster_columns = FALSE) +
rowAnnotation(mat_pct_st = anno_barplot(mat_pct_st), width = unit(2, "cm")) +
Heatmap(mat_enrich_st,
    cluster_rows = FALSE, cluster_columns = FALSE)

That is the basic structure of the visualization. We can do some customizations, especially the colors for different components in the heatmap list.

corr_col = c("green", "red")
dist_tss_col = c("#FF0000", "#FF7352", "#FFB299", "#FFD9CB")
gene_anno_col = c("green", "blue")
cgi_anno_col = c("#FFA500", "#FFD191")
z_score_col_fun = colorRamp2(c(-200, 0, 200), c("green", "white", "red"))
state_col = c("#FF0000", "#008000", "#C2E105", "#8A91D0", "#CD5C5C", "#808080", "#000000")

anno_width = unit(3.4, "cm")
ht_list = rowAnnotation(text = anno_text(label, location = unit(1, "npc"), just = "right", 
    gp = gpar(fontsize = 12)))

ht_list = ht_list + Heatmap(mean_meth, name = "mean_meth", col = meth_col_fun, 
    cluster_rows = FALSE, row_title = NULL, cluster_columns = FALSE, show_row_names = FALSE, column_names_rot = 45,
    column_names_gp = gpar(fontsize = 9),
    heatmap_legend_param = list(title = "Methylation", direction = "horizontal", legend_width = unit(3, "cm")), 
    width = ncol(mean_meth)*unit(4, "mm")) +
rowAnnotation("n_gr" = anno_barplot(n_gr, bar_width = 1, width = anno_width), 
    show_annotation_name = FALSE) +
rowAnnotation("n_corr" = anno_barplot(n_corr, bar_width = 1, gp = gpar(fill = corr_col), 
    width = anno_width), show_annotation_name = FALSE) +
rowAnnotation("dist_tss" = anno_barplot(dist_tss, bar_width = 1, gp = gpar(fill = dist_tss_col), 
    width = anno_width), show_annotation_name = FALSE) +
rowAnnotation("gene_anno" = anno_barplot(gene_anno, bar_width = 1, gp = gpar(fill = gene_anno_col), 
    width = anno_width), show_annotation_name = FALSE) +
rowAnnotation("cgi_anno" = anno_barplot(cgi_anno, bar_width = 1, gp = gpar(fill = cgi_anno_col), 
    width = anno_width), show_annotation_name = FALSE) +
Heatmap(mat_enrich_gf, name = "enrich_gf", col = z_score_col_fun, cluster_columns = FALSE,
    width = unit(ncol(mat_enrich_gf)*4, "mm"), column_title = "", column_names_rot = 45,
    column_names_gp = gpar(fontsize = 9), show_heatmap_legend = FALSE) +
rowAnnotation("pct_st" = anno_barplot(mat_pct_st, bar_width = 1, gp = gpar(fill = state_col), 
    width = anno_width), show_annotation_name = FALSE) +
Heatmap(mat_enrich_st, name = "enrich_st", col = z_score_col_fun, cluster_columns = FALSE, 
    width = unit(ncol(mat_enrich_st)*4, "mm"), column_title = "", show_heatmap_legend = FALSE,
    column_names_gp = gpar(col = state_col, fontsize = 9), show_row_names = FALSE, column_names_rot = 45)

lgd_list = list(
    Legend(labels = c("negative", "positive"), title = "Correlation",
        legend_gp = gpar(fill = c("green", "red"))),
    Legend(labels = c("gene", "intergenic"), title = "Gene annotation", 
        legend_gp = gpar(fill = gene_anno_col)),
    Legend(labels = c("<1kb", "1kb~5kb", "5kb~10kb", ">10kb"), title = "Distance to TSS", 
        legend_gp = gpar(fill = dist_tss_col), nrow = 2),
    Legend(labels = c("CGI", "CGI shore"), title = "CGI annotation", 
        legend_gp = gpar(fill = cgi_anno_col)),
    Legend(col_fun = z_score_col_fun, title = "Z-score",direction = "horizontal", 
        at = c(-200, 0, 200),
        legend_width = unit(3, "cm")),
    Legend(labels = colnames(mat_enrich_st), title = "Chromatin states", 
        legend_gp = gpar(fill = state_col), nrow = 2)
)


draw(ht_list, padding = unit(c(2, 2, 16, 2), "mm"), row_split = row_split, 
    heatmap_legend_list = lgd_list, heatmap_legend_side = "bottom")
anno_title = c("n_gr" = "Number of\nDMRs", "n_corr" = "Significantly\ncorrelated genes",
    "gene_anno" = "Gene annotation", "dist_tss" = "Distance to TSS",
    "cgi_anno" = "CGI annotation", "pct_st" = "Overlap to\nChromatin states")
for(an in names(anno_title)) {
    decorate_annotation(an, {
        grid.text(anno_title[an], y = unit(1, "npc") + unit(3, "mm"), just = "bottom")
    })
}
ht_title = c("mean_meth" = "Mean\nmethylation", "enrich_gf" = "Enrichment to\ngenomic features",
    "enrich_st" = "Enrichment to\nchromatin states")
for(an in names(ht_title)) {
    decorate_heatmap_body(an, {
        grid.text(ht_title[an], y = unit(1, "npc") + unit(3, "mm"), just = "bottom")
    })
}

Landscape Visualization

Zuguang Gu z.gu@dkfz.de

2025-06-25

Purpose

Analysis

Reference