Example of how performing some permutations

rm(list=ls())

if(!require(DADo))
  devtools::install_github("marzuf/MANUSCRIPT_FIGURES", subdir="DADo")
  # alternatively: 
  # install.packages("DADo_0.0.0.1.tar.gz", repos = NULL, type ="source")
 # data("norm_ID")
library(DADo)
library(doMC)
library(foreach)
nCpu <- 2
registerDoMC(nCpu)

Prepare the data

# table from gene-level DE analysis:
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_fpkmDT") # this loads fpkmDT
data("ENCSR489OCU_NCI-H460_40kb_TCGAluad_norm_luad_pipeline_geneList") # this loads pipeline_geneList

# table with gene-to-domain assignment
gene2tad_dt <- read.delim(system.file("extdata", "ENCSR489OCU_NCI-H460_all_genes_positions.txt", package = "DADo"),
                          stringsAsFactors = FALSE,
                          header=FALSE, 
                          col.names=c("entrezID", "chromosome", "start", "end", "region"))
gene2tad_dt$entrezID <- as.character(gene2tad_dt$entrezID)

# take only the genes used in the pipeline
pip_g2t_dt <- gene2tad_dt[gene2tad_dt$entrezID %in% pipeline_geneList,]
head(pip_g2t_dt)
#>    entrezID chromosome   start     end     region
#> 5     22982      chr10  320130  735621 chr10_TAD2
#> 8     23185      chr10  852854  977645 chr10_TAD2
#> 10    23560      chr10 1034349 1063708 chr10_TAD2
#> 11    91734      chr10 1064847 1071799 chr10_TAD2
#> 12    55853      chr10 1068577 1090141 chr10_TAD2
#> 13     3422      chr10 1085963 1095061 chr10_TAD2

# table with domain positions
tad_pos_dt <- read.delim(system.file("extdata", "ENCSR489OCU_NCI-H460_all_assigned_regions.txt", package = "DADo"),
                          stringsAsFactors = FALSE,
                          header=FALSE, 
                          col.names=c("chromosome", "region", "start", "end"))
stopifnot(pip_g2t_dt$region %in% tad_pos_dt$region)
pip_tad_pos_dt <- tad_pos_dt[tad_pos_dt$region %in% pip_g2t_dt$region,]
head(pip_tad_pos_dt)
#>    chromosome      region    start      end
#> 3       chr10  chr10_TAD2   720001  1120000
#> 17      chr10 chr10_TAD16  4920001  5120000
#> 19      chr10 chr10_TAD18  5520001  6000000
#> 21      chr10 chr10_TAD20  6080001  6320000
#> 28      chr10 chr10_TAD27  7480001  7840000
#> 43      chr10 chr10_TAD42 12080001 12280000

Gene-to-domain permutation

nExprClass <- 5
nPermut <- 10
aggFunc <- "median"

permut_dt <- get_gene2tad_multiPermut(g2TADdt=pip_g2t_dt, 
                                          RNAdt=fpkmDT, 
                                          geneIDlist=pipeline_geneList, 
                                          nClass = nExprClass, 
                                          TADonly=FALSE, # already filtred 
                                          nSimu=nPermut, 
                                          withExprClass=TRUE, 
                                          nCpu=nCpu, 
                                          aggregFun = aggFunc)
#> Warning in get_gene2tad_multiPermut(g2TADdt = pip_g2t_dt, RNAdt = fpkmDT, :
#> geneIDlist argument should correspond to rownames of RNAdt
#> Warning in get_gene2tad_multiPermut(g2TADdt = pip_g2t_dt, RNAdt = fpkmDT, :
#> duplicated - ambiguous - are removed !!!
#> Warning in get_ShuffledPositions_vFunct(g2TADdt = g2TADdt, geneIDlist =
#> geneIDlist, : geneIDlist argument should correspond to rownames of RNAdt
#> Warning in get_ShuffledPositions_vFunct(g2TADdt = g2TADdt, geneIDlist =
#> geneIDlist, : duplicated - ambiguous - are removed !!!
head_sq(permut_dt)
#>   entrezID      permut1      permut2      permut3      permut4
#> 1    10357  chr3_TAD241  chr4_TAD416  chr1_TAD354  chr3_TAD404
#> 2       14 chr12_TAD210   chr19_TAD8 chr14_TAD159 chr20_TAD117
#> 3       16  chr16_TAD75 chr10_TAD336  chr1_TAD425  chr2_TAD461
#> 4    60496  chr4_TAD455  chr6_TAD129 chr11_TAD244 chr10_TAD336
#> 5    79963  chr5_TAD584 chr20_TAD109  chr1_TAD130  chr20_TAD25
rownames(permut_dt) <- permut_dt$entrezID
permut_dt$entrezID <- NULL

stopifnot(ncol(permut_dt) == nPermut)
stopifnot(setequal(rownames(permut_dt), pipeline_geneList))
save(permut_dt, file="package_permut_dt.RData", version=2)

Sampling across domain boundaries


sampAcrossBD_data <- getSampleAcrossBD(g2t_DT=pip_g2t_dt, 
                                       tadpos_DT=pip_tad_pos_dt)

str(sampAcrossBD_data[[1]])
#> List of 13
#>  $ tad_genes    : chr [1:13] "100133331" "79854" "643837" "284593" ...
#>  $ genes        : chr [1:13] "64856" "219293" "83858" "55210" ...
#>  $ nGenes       : int 13
#>  $ minDist      : num 534582
#>  $ maxDist      : num 1116480
#>  $ genes_left   : chr(0) 
#>  $ nGenes_left  : int 0
#>  $ minDist_left : num Inf
#>  $ maxDist_left : num -Inf
#>  $ genes_right  : chr [1:13] "64856" "219293" "83858" "55210" ...
#>  $ nGenes_right : int 13
#>  $ minDist_right: num 534582
#>  $ maxDist_right: num 1116480
stopifnot(length(sampAcrossBD_data) == length(unique(pip_g2t_dt$region)))
save(sampAcrossBD_data, file="package_sampAcrossBD_data.RData", version=2)