Example of how performing some permutations
rm(list=ls())
if(!require(DADo))
devtools::install_github("marzuf/MANUSCRIPT_FIGURES", subdir="DADo")
# alternatively:
# install.packages("DADo_0.0.0.1.tar.gz", repos = NULL, type ="source")
# data("norm_ID")
library(DADo)
library(doMC)
library(foreach)
nCpu <- 2
registerDoMC(nCpu)
Prepare the data
# table from gene-level DE analysis:
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_fpkmDT") # this loads fpkmDT
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_pipeline_geneList") # this loads pipeline_geneList
# table with gene-to-domain assignment
gene2tad_dt <- read.delim(system.file("extdata", "ENCSR489OCU_NCI-H460_all_genes_positions.txt", package = "DADo"),
stringsAsFactors = FALSE,
header=FALSE,
col.names=c("entrezID", "chromosome", "start", "end", "region"))
gene2tad_dt$entrezID <- as.character(gene2tad_dt$entrezID)
# take only the genes used in the pipeline
pip_g2t_dt <- gene2tad_dt[gene2tad_dt$entrezID %in% pipeline_geneList,]
head(pip_g2t_dt)
#> entrezID chromosome start end region
#> 5 22982 chr10 320130 735621 chr10_TAD2
#> 8 23185 chr10 852854 977645 chr10_TAD2
#> 10 23560 chr10 1034349 1063708 chr10_TAD2
#> 11 91734 chr10 1064847 1071799 chr10_TAD2
#> 12 55853 chr10 1068577 1090141 chr10_TAD2
#> 13 3422 chr10 1085963 1095061 chr10_TAD2
# table with domain positions
tad_pos_dt <- read.delim(system.file("extdata", "ENCSR489OCU_NCI-H460_all_assigned_regions.txt", package = "DADo"),
stringsAsFactors = FALSE,
header=FALSE,
col.names=c("chromosome", "region", "start", "end"))
stopifnot(pip_g2t_dt$region %in% tad_pos_dt$region)
pip_tad_pos_dt <- tad_pos_dt[tad_pos_dt$region %in% pip_g2t_dt$region,]
head(pip_tad_pos_dt)
#> chromosome region start end
#> 3 chr10 chr10_TAD2 720001 1120000
#> 17 chr10 chr10_TAD16 4920001 5120000
#> 19 chr10 chr10_TAD18 5520001 6000000
#> 21 chr10 chr10_TAD20 6080001 6320000
#> 28 chr10 chr10_TAD27 7480001 7840000
#> 43 chr10 chr10_TAD42 12080001 12280000
Gene-to-domain permutation
nExprClass <- 5
nPermut <- 10
aggFunc <- "median"
permut_dt <- get_gene2tad_multiPermut(g2TADdt=pip_g2t_dt,
RNAdt=fpkmDT,
geneIDlist=pipeline_geneList,
nClass = nExprClass,
TADonly=FALSE, # already filtred
nSimu=nPermut,
withExprClass=TRUE,
nCpu=nCpu,
aggregFun = aggFunc)
#> Warning in get_gene2tad_multiPermut(g2TADdt = pip_g2t_dt, RNAdt = fpkmDT, :
#> geneIDlist argument should correspond to rownames of RNAdt
#> Warning in get_gene2tad_multiPermut(g2TADdt = pip_g2t_dt, RNAdt = fpkmDT, :
#> duplicated - ambiguous - are removed !!!
#> Warning in get_ShuffledPositions_vFunct(g2TADdt = g2TADdt, geneIDlist =
#> geneIDlist, : geneIDlist argument should correspond to rownames of RNAdt
#> Warning in get_ShuffledPositions_vFunct(g2TADdt = g2TADdt, geneIDlist =
#> geneIDlist, : duplicated - ambiguous - are removed !!!
head_sq(permut_dt)
#> entrezID permut1 permut2 permut3 permut4
#> 1 10357 chr3_TAD241 chr4_TAD416 chr1_TAD354 chr3_TAD404
#> 2 14 chr12_TAD210 chr19_TAD8 chr14_TAD159 chr20_TAD117
#> 3 16 chr16_TAD75 chr10_TAD336 chr1_TAD425 chr2_TAD461
#> 4 60496 chr4_TAD455 chr6_TAD129 chr11_TAD244 chr10_TAD336
#> 5 79963 chr5_TAD584 chr20_TAD109 chr1_TAD130 chr20_TAD25
rownames(permut_dt) <- permut_dt$entrezID
permut_dt$entrezID <- NULL
stopifnot(ncol(permut_dt) == nPermut)
stopifnot(setequal(rownames(permut_dt), pipeline_geneList))
save(permut_dt, file="package_permut_dt.RData", version=2)
Sampling across domain boundaries
sampAcrossBD_data <- getSampleAcrossBD(g2t_DT=pip_g2t_dt,
tadpos_DT=pip_tad_pos_dt)
str(sampAcrossBD_data[[1]])
#> List of 13
#> $ tad_genes : chr [1:13] "100133331" "79854" "643837" "284593" ...
#> $ genes : chr [1:13] "64856" "219293" "83858" "55210" ...
#> $ nGenes : int 13
#> $ minDist : num 534582
#> $ maxDist : num 1116480
#> $ genes_left : chr(0)
#> $ nGenes_left : int 0
#> $ minDist_left : num Inf
#> $ maxDist_left : num -Inf
#> $ genes_right : chr [1:13] "64856" "219293" "83858" "55210" ...
#> $ nGenes_right : int 13
#> $ minDist_right: num 534582
#> $ maxDist_right: num 1116480
stopifnot(length(sampAcrossBD_data) == length(unique(pip_g2t_dt$region)))
save(sampAcrossBD_data, file="package_sampAcrossBD_data.RData", version=2)