rm(list=ls())
# devtools::install_github("CSOgroup/DADo", subdir="DADo")
#
# if(!require(DADo))
# devtools::install_github("CSOgroup/DADo", subdir="DADo")
# alternatively:
# install.packages("DADo_0.0.0.1.tar.gz", repos = NULL, type ="source")
# data("norm_ID")
library(DADo)
data("conserved_region_130_genes_plot_dt") # this loads genes_plot_dt
head(genes_plot_dt)
#> symbol start end chromo count
#> 5 GIMAP8 150147748 150176483 chr7 9
#> 4 GIMAP7 150211945 150218161 chr7 27
#> 2 GIMAP4 150264458 150271043 chr7 27
#> 3 GIMAP6 150322463 150329736 chr7 27
#> 1 GIMAP2 150382690 150390729 chr7 25
#> 6 GIMAP1 150413645 150421368 chr7 13
data("conserved_region_130_tads_plot_dt") # this loads tads_plot_dt
head(tads_plot_dt)
#> dataset dsCat cond1 cond2 upCond region
#> 4 ENCSR401TBQ_Caki2 norm_vs_tumor norm kich norm chr7_TAD567
#> 9 ENCSR489OCU_NCI-H460 norm_vs_tumor norm lusc norm chr7_TAD556
#> 16 GSE105381_HepG2 norm_vs_tumor norm lihc norm chr7_TAD515
#> 1 ENCSR079VIJ_G401 norm_vs_tumor norm kich norm chr7_TAD560
#> 18 GSE99051_786_O norm_vs_tumor norm kich norm chr7_TAD571
#> 6 ENCSR444WCZ_A549 norm_vs_tumor norm luad norm chr7_TAD553
#> start end chromo
#> 4 150120001 150640000 chr7
#> 9 150160001 150680000 chr7
#> 16 150120001 150440000 chr7
#> 1 150120001 150400000 chr7
#> 18 150120001 150400000 chr7
#> 6 150160001 150400000 chr7
plot_conservedRegions(genes_dt=genes_plot_dt,
tads_dt=tads_plot_dt,
dsCat_cols = setNames(c("firebrick3", "navy", "gray50"), c("wt_vs_mut", "norm_vs_tumor", "subtypes")))
NB: see the Domain-level statistics vignette for computing domain meanLogFC and mean intra-correlation.
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_all_meanCorr_TAD") # this loads all_meanCorr_TAD
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_all_meanLogFC_TAD") # this loads all_meanLogFC_TAD
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_emp_pval_combined") # this loads emp_pval_combined
plot_volcanoTADsCorrFC(meanCorr=all_meanCorr_TAD,
meanFC=all_meanLogFC_TAD,
comb_pval=emp_pval_combined,
tads_to_annot = "chr11_TAD390")
#> ... adjust p-values with BH method
#> ... kept meanCorr values: 1774/1774
#> ... kept meanFC values: 1774/1774
#> ... kept comb. p-val values: 1774/1774
## Prepare the observed ratioDown data
# table from gene-level DE analysis:
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_DE_topTable") # this loads DE_topTable
DE_topTable$genes <- as.character(DE_topTable$genes)
# list of genes used in the pipeline
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_pipeline_geneList") # this loads pipeline_geneList
# for those genes, I have logFC data
stopifnot(names(pipeline_geneList) %in% DE_topTable$genes)
DE_topTable <- DE_topTable[DE_topTable$genes %in% names(pipeline_geneList),]
DE_topTable$entrezID <- pipeline_geneList[DE_topTable$genes]
stopifnot(!is.na(DE_topTable$entrezID))
# table with gene-to-domain assignment
gene2tad_dt <- read.delim(system.file("extdata", "ENCSR489OCU_NCI-H460_all_genes_positions.txt", package = "DADo"),
stringsAsFactors = FALSE,
header=FALSE,
col.names=c("entrezID", "chromosome", "start", "end", "region"))
gene2tad_dt$entrezID <- as.character(gene2tad_dt$entrezID)
# take only the genes used in the pipeline
pip_g2t_dt <- gene2tad_dt[gene2tad_dt$entrezID %in% pipeline_geneList,]
# merge to match gene-to-TAD and logFC
merged_dt <- merge(pip_g2t_dt[,c("entrezID", "region")], DE_topTable[,c("logFC", "entrezID")], by="entrezID", all.x=TRUE, all.y=FALSE)
stopifnot(!is.na(merged_dt))
# compute the ratioDown for each domain (cf. domain-level statistics vignette !)
ratioDown_dt <- aggregate(logFC ~ region, FUN=get_ratioDown, data=merged_dt)
colnames(ratioDown_dt)[colnames(ratioDown_dt) == "logFC"] <- "ratioDown"
obs_ratioDown <- setNames(ratioDown_dt$ratioDown, ratioDown_dt$region)
save(obs_ratioDown, file="package_obs_ratioDown.RData", version=2)
require(foreach)
## Prepare the ratioDown for permutation data
data("cut1000_ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_permutationsDT") # this loads permutationsDT
head_sq(permutationsDT)
#> region1 region2 region3 region4 region5
#> 8927 chr16_TAD282 chr9_TAD349 chr19_TAD48 chr2_TAD813 chr19_TAD193
#> 221400 chr15_TAD67 chr19_TAD195 chr1_TAD147 chr1_TAD413 chr13_TAD95
#> 2196 chr17_TAD110 chr13_TAD96 chr15_TAD123 chr10_TAD293 chr2_TAD277
#> 379013 chr15_TAD121 chr19_TAD197 chr9_TAD449 chr3_TAD597 chr18_TAD238
#> 130399 chr5_TAD282 chr12_TAD415 chr10_TAD153 chr16_TAD102 chr22_TAD92
stopifnot(setequal(rownames(permutationsDT), pipeline_geneList))
tad_levels <- as.character(ratioDown_dt$region)
all_permut_ratioDown_dt <- foreach(i = 1:ncol(permutationsDT), .combine='cbind') %dopar% {
perm_g2t <- data.frame(entrezID=as.character(rownames(permutationsDT)),
region = as.character(permutationsDT[,i]),
stringsAsFactors = FALSE)
perm_merged_dt <- merge(perm_g2t, DE_topTable[,c("logFC", "entrezID")], by="entrezID", all.x=TRUE, all.y=FALSE)
stopifnot(!is.na(perm_merged_dt))
# compute the FCC for each domain
permut_rD_dt <- aggregate(logFC ~ region, FUN=get_ratioDown, data=perm_merged_dt)
colnames(permut_rD_dt)[colnames(permut_rD_dt) == "logFC"] <- "ratioDown"
rownames(permut_rD_dt) <- as.character(permut_rD_dt$region)
stopifnot(setequal(rownames(permut_rD_dt), tad_levels))
out_dt <- permut_rD_dt[tad_levels, "ratioDown", drop=FALSE]
stopifnot(out_dt$ratioDown >= 0 & out_dt$ratioDown <= 1) # just to check...
out_dt
}
head_sq(all_permut_ratioDown_dt)
#> ratioDown ratioDown.1 ratioDown.2 ratioDown.3 ratioDown.4
#> chr1_TAD1 0.5384615 0.3846154 0.3076923 0.7692308 0.4615385
#> chr1_TAD100 0.6666667 0.3333333 0.5000000 0.6666667 0.3333333
#> chr1_TAD101 0.5000000 0.2500000 0.0000000 0.5000000 0.5000000
#> chr1_TAD102 0.6666667 0.6666667 0.3333333 0.3333333 0.3333333
#> chr1_TAD109 0.3333333 0.6666667 0.3333333 0.6666667 0.0000000
## Do the plots:
myplots <- plot_smileRatioDownConcord(observed_rD=obs_ratioDown,
permut_rD_dt=all_permut_ratioDown_dt,
plotTit1=paste0("NCI-H460 - TCGA normal vs. LUAD")
)
Line version of the “smile plot”:
myplots[[1]]
Histogram version of the “smile plot”:
myplots[[2]]
see the Domain-level statistics vignette !