Example of some plotting possibilities

rm(list=ls())

# devtools::install_github("CSOgroup/DADo", subdir="DADo")
# 
# if(!require(DADo))
#   devtools::install_github("CSOgroup/DADo", subdir="DADo")
  # alternatively: 
  # install.packages("DADo_0.0.0.1.tar.gz", repos = NULL, type ="source")
 # data("norm_ID")
library(DADo)

Conserved region

data("conserved_region_130_genes_plot_dt") # this loads genes_plot_dt
head(genes_plot_dt)
#>   symbol     start       end chromo count
#> 5 GIMAP8 150147748 150176483   chr7     9
#> 4 GIMAP7 150211945 150218161   chr7    27
#> 2 GIMAP4 150264458 150271043   chr7    27
#> 3 GIMAP6 150322463 150329736   chr7    27
#> 1 GIMAP2 150382690 150390729   chr7    25
#> 6 GIMAP1 150413645 150421368   chr7    13

data("conserved_region_130_tads_plot_dt") # this loads tads_plot_dt
head(tads_plot_dt)
#>                 dataset         dsCat cond1 cond2 upCond      region
#> 4     ENCSR401TBQ_Caki2 norm_vs_tumor  norm  kich   norm chr7_TAD567
#> 9  ENCSR489OCU_NCI-H460 norm_vs_tumor  norm  lusc   norm chr7_TAD556
#> 16      GSE105381_HepG2 norm_vs_tumor  norm  lihc   norm chr7_TAD515
#> 1      ENCSR079VIJ_G401 norm_vs_tumor  norm  kich   norm chr7_TAD560
#> 18       GSE99051_786_O norm_vs_tumor  norm  kich   norm chr7_TAD571
#> 6      ENCSR444WCZ_A549 norm_vs_tumor  norm  luad   norm chr7_TAD553
#>        start       end chromo
#> 4  150120001 150640000   chr7
#> 9  150160001 150680000   chr7
#> 16 150120001 150440000   chr7
#> 1  150120001 150400000   chr7
#> 18 150120001 150400000   chr7
#> 6  150160001 150400000   chr7

plot_conservedRegions(genes_dt=genes_plot_dt, 
                      tads_dt=tads_plot_dt,
                      dsCat_cols = setNames(c("firebrick3", "navy", "gray50"), c("wt_vs_mut", "norm_vs_tumor", "subtypes")))

plot of chunk plot_conserved

Volcano-like intra-domain corr. vs. domain meanLogFC

NB: see the Domain-level statistics vignette for computing domain meanLogFC and mean intra-correlation.

data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_all_meanCorr_TAD") # this loads all_meanCorr_TAD
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_all_meanLogFC_TAD") # this loads all_meanLogFC_TAD
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_emp_pval_combined") # this loads emp_pval_combined

plot_volcanoTADsCorrFC(meanCorr=all_meanCorr_TAD, 
                       meanFC=all_meanLogFC_TAD, 
                       comb_pval=emp_pval_combined,
                       tads_to_annot = "chr11_TAD390")
#> ... adjust p-values with BH method
#> ... kept meanCorr values:    1774/1774
#> ... kept meanFC values:  1774/1774
#> ... kept comb. p-val values: 1774/1774

plot of chunk plot_volcano

Domain-level concordance “smile plot”

## Prepare the observed ratioDown data
# table from gene-level DE analysis:
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_DE_topTable") # this loads DE_topTable
DE_topTable$genes <- as.character(DE_topTable$genes)
# list of genes used in the pipeline
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_pipeline_geneList") # this loads pipeline_geneList
# for those genes, I have logFC data
stopifnot(names(pipeline_geneList) %in% DE_topTable$genes)
DE_topTable <- DE_topTable[DE_topTable$genes %in% names(pipeline_geneList),]
DE_topTable$entrezID <- pipeline_geneList[DE_topTable$genes]
stopifnot(!is.na(DE_topTable$entrezID))
# table with gene-to-domain assignment
gene2tad_dt <- read.delim(system.file("extdata", "ENCSR489OCU_NCI-H460_all_genes_positions.txt", package = "DADo"),
                          stringsAsFactors = FALSE,
                          header=FALSE, 
                          col.names=c("entrezID", "chromosome", "start", "end", "region"))
gene2tad_dt$entrezID <- as.character(gene2tad_dt$entrezID)
# take only the genes used in the pipeline
pip_g2t_dt <- gene2tad_dt[gene2tad_dt$entrezID %in% pipeline_geneList,]
# merge to match gene-to-TAD and logFC
merged_dt <- merge(pip_g2t_dt[,c("entrezID", "region")], DE_topTable[,c("logFC", "entrezID")], by="entrezID", all.x=TRUE, all.y=FALSE)
stopifnot(!is.na(merged_dt))
# compute the ratioDown for each domain (cf. domain-level statistics vignette !)
ratioDown_dt <- aggregate(logFC ~ region, FUN=get_ratioDown, data=merged_dt)
colnames(ratioDown_dt)[colnames(ratioDown_dt) == "logFC"] <- "ratioDown"
obs_ratioDown <- setNames(ratioDown_dt$ratioDown, ratioDown_dt$region)
save(obs_ratioDown, file="package_obs_ratioDown.RData", version=2)
require(foreach)
## Prepare the ratioDown for permutation data
data("cut1000_ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_permutationsDT") # this loads permutationsDT
head_sq(permutationsDT)
#>             region1      region2      region3      region4      region5
#> 8927   chr16_TAD282  chr9_TAD349  chr19_TAD48  chr2_TAD813 chr19_TAD193
#> 221400  chr15_TAD67 chr19_TAD195  chr1_TAD147  chr1_TAD413  chr13_TAD95
#> 2196   chr17_TAD110  chr13_TAD96 chr15_TAD123 chr10_TAD293  chr2_TAD277
#> 379013 chr15_TAD121 chr19_TAD197  chr9_TAD449  chr3_TAD597 chr18_TAD238
#> 130399  chr5_TAD282 chr12_TAD415 chr10_TAD153 chr16_TAD102  chr22_TAD92
stopifnot(setequal(rownames(permutationsDT), pipeline_geneList))
tad_levels <- as.character(ratioDown_dt$region)
all_permut_ratioDown_dt <- foreach(i = 1:ncol(permutationsDT), .combine='cbind') %dopar% {
  perm_g2t <- data.frame(entrezID=as.character(rownames(permutationsDT)),
                         region = as.character(permutationsDT[,i]),
                         stringsAsFactors = FALSE)
  perm_merged_dt <- merge(perm_g2t, DE_topTable[,c("logFC", "entrezID")], by="entrezID", all.x=TRUE, all.y=FALSE)
  stopifnot(!is.na(perm_merged_dt))
  # compute the FCC for each domain
  permut_rD_dt <- aggregate(logFC ~ region, FUN=get_ratioDown, data=perm_merged_dt)
  colnames(permut_rD_dt)[colnames(permut_rD_dt) == "logFC"] <- "ratioDown"
  rownames(permut_rD_dt) <- as.character(permut_rD_dt$region)
  stopifnot(setequal(rownames(permut_rD_dt), tad_levels))
  out_dt <- permut_rD_dt[tad_levels, "ratioDown", drop=FALSE]
  stopifnot(out_dt$ratioDown >= 0 & out_dt$ratioDown <= 1) # just to check...
  out_dt
}
head_sq(all_permut_ratioDown_dt)
#>             ratioDown ratioDown.1 ratioDown.2 ratioDown.3 ratioDown.4
#> chr1_TAD1   0.5384615   0.3846154   0.3076923   0.7692308   0.4615385
#> chr1_TAD100 0.6666667   0.3333333   0.5000000   0.6666667   0.3333333
#> chr1_TAD101 0.5000000   0.2500000   0.0000000   0.5000000   0.5000000
#> chr1_TAD102 0.6666667   0.6666667   0.3333333   0.3333333   0.3333333
#> chr1_TAD109 0.3333333   0.6666667   0.3333333   0.6666667   0.0000000
## Do the plots:
myplots <- plot_smileRatioDownConcord(observed_rD=obs_ratioDown,
                           permut_rD_dt=all_permut_ratioDown_dt,
                           plotTit1=paste0("NCI-H460 - TCGA normal vs. LUAD")
                           )

Line version of the “smile plot”:

myplots[[1]]

plot of chunk plot_line

Histogram version of the “smile plot”:

myplots[[2]]

plot of chunk plot_hist

FCC cumsum curves

see the Domain-level statistics vignette !