knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir="E:/DISC/reproducibility")
utilities_path = "./source/utilities.r"
source(utilities_path)
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'data.table':
method from
print.data.table
reldist: Relative Distribution Methods
Version 1.6-6 created on 2016-10-07.
copyright (c) 2003, Mark S. Handcock, University of California-Los Angeles
For citation information, type citation("reldist").
Type help(package="reldist") to get started.
Here, we use PBMC dataset as an example. The fraction of sampling is set to 50% of the original library size across cells. Only 1 replicate will be generated here as an example. Only imputed genes will be kept for comparison.
used_datasets = c("MELANOMA", "SSCORTEX", "CBMC", "PBMC")
method_names = c("DISC", "scVI", "MAGIC", "DCA", "scScope", "DeepImpute", "VIPER", "scImpute")
output_dir = "./results/Identification_of_true_zeros"
dir.create(output_dir, showWarnings = F, recursive = T)
data_list = list()
for(ii in used_datasets){
data_list[[ii]] = list()
raw_data = readh5_loom(paste0("./data/", ii, "/raw.loom"))
vst_file = paste0(output_dir, "/", ii, "_vst_gene.tsv")
if(file.exists(vst_file)){
hvg_info = read.table(vst_file)
print("load vst_file")
}else{
hvg_info = FindVariableFeatures_vst_by_genes(raw_data)
hvg_info = hvg_info[order(hvg_info$variance.standardized, decreasing = T), ]
write.table(hvg_info, vst_file, sep = "\t", quote = F, row.names = T, col.names = T)
}
ds_dir = paste0("./data/", ii, "/ds_0.5/r1")
observed_data = readh5_loom(paste0(ds_dir, "/gene_selection.loom"))
compared_genes = rownames(observed_data)
top_1000_genes = rownames(hvg_info)[rownames(hvg_info) %in% compared_genes][1:1000]
raw_data = raw_data[compared_genes, ]
for(jj in method_names){
if(jj == "DISC"){
imputation = readh5_loom(paste0(ds_dir, "/", jj, ".loom"))[compared_genes, ]
}else{
imputation = readh5_imputation(paste0(ds_dir, "/", jj, ".hdf5"))[compared_genes, ]
}
imputation_cp10k = sweep(imputation, 2, 10000 / colSums(imputation), "*")
raw_zero = raw_data == 0 & observed_data == 0
induced_zero = raw_data != 0 & observed_data == 0
kept_values = raw_data != 0 & observed_data != 0
data_list[[ii]][[jj]] = list(raw_zero_imputation = imputation_cp10k[raw_zero],
induced_zero_imputation = imputation_cp10k[induced_zero],
top_raw_zero_imputation = imputation_cp10k[top_1000_genes, ][raw_zero[top_1000_genes, ]],
top_induced_zero_imputation = imputation_cp10k[top_1000_genes, ][induced_zero[top_1000_genes, ]])
cat("Finish: ", ii, " - ", jj, "\n")
}
cat("Finish: ", ii, "\n")
}
[1] "./data/MELANOMA/raw.loom"
[1] 32287 8498
[1] "load vst_file"
[1] "./data/MELANOMA/ds_0.5/r1/gene_selection.loom"
[1] 12509 8498
[1] "./data/MELANOMA/ds_0.5/r1/DISC.loom"
[1] 32287 8498
Finish: MELANOMA - DISC
[1] "./data/MELANOMA/ds_0.5/r1/scVI.hdf5"
[1] 12509 8498
Finish: MELANOMA - scVI
[1] "./data/MELANOMA/ds_0.5/r1/MAGIC.hdf5"
[1] 12509 8498
Finish: MELANOMA - MAGIC
[1] "./data/MELANOMA/ds_0.5/r1/DCA.hdf5"
[1] 12509 8498
Finish: MELANOMA - DCA
[1] "./data/MELANOMA/ds_0.5/r1/scScope.hdf5"
[1] 12509 8498
Finish: MELANOMA - scScope
[1] "./data/MELANOMA/ds_0.5/r1/DeepImpute.hdf5"
[1] 12509 8498
Finish: MELANOMA - DeepImpute
[1] "./data/MELANOMA/ds_0.5/r1/VIPER.hdf5"
[1] 12509 8498
Finish: MELANOMA - VIPER
[1] "./data/MELANOMA/ds_0.5/r1/scImpute.hdf5"
[1] 12509 8498
Finish: MELANOMA - scImpute
Finish: MELANOMA
[1] "./data/SSCORTEX/raw.loom"
[1] 27998 7416
[1] "load vst_file"
[1] "./data/SSCORTEX/ds_0.5/r1/gene_selection.loom"
[1] 12584 7416
[1] "./data/SSCORTEX/ds_0.5/r1/DISC.loom"
[1] 27998 7416
Finish: SSCORTEX - DISC
[1] "./data/SSCORTEX/ds_0.5/r1/scVI.hdf5"
[1] 12584 7416
Finish: SSCORTEX - scVI
[1] "./data/SSCORTEX/ds_0.5/r1/MAGIC.hdf5"
[1] 12584 7416
Finish: SSCORTEX - MAGIC
[1] "./data/SSCORTEX/ds_0.5/r1/DCA.hdf5"
[1] 12584 7416
Finish: SSCORTEX - DCA
[1] "./data/SSCORTEX/ds_0.5/r1/scScope.hdf5"
[1] 12584 7416
Finish: SSCORTEX - scScope
[1] "./data/SSCORTEX/ds_0.5/r1/DeepImpute.hdf5"
[1] 12584 7416
Finish: SSCORTEX - DeepImpute
[1] "./data/SSCORTEX/ds_0.5/r1/VIPER.hdf5"
[1] 12584 7416
Finish: SSCORTEX - VIPER
[1] "./data/SSCORTEX/ds_0.5/r1/scImpute.hdf5"
[1] 12584 7416
Finish: SSCORTEX - scImpute
Finish: SSCORTEX
[1] "./data/CBMC/raw.loom"
[1] 20400 8005
[1] "load vst_file"
[1] "./data/CBMC/ds_0.5/r1/gene_selection.loom"
[1] 9273 8005
[1] "./data/CBMC/ds_0.5/r1/DISC.loom"
[1] 20400 8005
Finish: CBMC - DISC
[1] "./data/CBMC/ds_0.5/r1/scVI.hdf5"
[1] 9273 8005
Finish: CBMC - scVI
[1] "./data/CBMC/ds_0.5/r1/MAGIC.hdf5"
[1] 9273 8005
Finish: CBMC - MAGIC
[1] "./data/CBMC/ds_0.5/r1/DCA.hdf5"
[1] 9273 8005
Finish: CBMC - DCA
[1] "./data/CBMC/ds_0.5/r1/scScope.hdf5"
[1] 9273 8005
Finish: CBMC - scScope
[1] "./data/CBMC/ds_0.5/r1/DeepImpute.hdf5"
[1] 9273 8005
Finish: CBMC - DeepImpute
[1] "./data/CBMC/ds_0.5/r1/VIPER.hdf5"
[1] 9273 8005
Finish: CBMC - VIPER
[1] "./data/CBMC/ds_0.5/r1/scImpute.hdf5"
[1] 9273 8005
Finish: CBMC - scImpute
Finish: CBMC
[1] "./data/PBMC/raw.loom"
[1] 32738 2638
[1] "load vst_file"
[1] "./data/PBMC/ds_0.5/r1/gene_selection.loom"
[1] 6403 2638
[1] "./data/PBMC/ds_0.5/r1/DISC.loom"
[1] 32738 2638
Finish: PBMC - DISC
[1] "./data/PBMC/ds_0.5/r1/scVI.hdf5"
[1] 6403 2638
Finish: PBMC - scVI
[1] "./data/PBMC/ds_0.5/r1/MAGIC.hdf5"
[1] 6403 2638
Finish: PBMC - MAGIC
[1] "./data/PBMC/ds_0.5/r1/DCA.hdf5"
[1] 6403 2638
Finish: PBMC - DCA
[1] "./data/PBMC/ds_0.5/r1/scScope.hdf5"
[1] 6403 2638
Finish: PBMC - scScope
[1] "./data/PBMC/ds_0.5/r1/DeepImpute.hdf5"
[1] 6403 2638
Finish: PBMC - DeepImpute
[1] "./data/PBMC/ds_0.5/r1/VIPER.hdf5"
[1] 6403 2638
Finish: PBMC - VIPER
[1] "./data/PBMC/ds_0.5/r1/scImpute.hdf5"
[1] 6403 2638
Finish: PBMC - scImpute
Finish: PBMC
Following this paper (SAVER: gene expression recovery for single-cell RNA sequencing, https://www.nature.com/articles/s41592-018-0033-z), we evaluate the performance of different imputation methods to indentify true zeros.
par(mfrow = c(1, length(used_datasets)), mar = c(5, 0, 0.5, 0), oma = c(0, 5, 6, 2), mgp = c(3.5, 1, 0), cex.axis = 1.1, cex.lab = 1.5, font.lab = 2, cex.main = 1.5)
mtext_position = seq(from = 0, to = 1, by = 1 / length(used_datasets) / 2)[seq(length(used_datasets)) * 2]
names(mtext_position) = used_datasets
for(ii in used_datasets){
max_value = 0
for(jj in method_names){
max_value = max(c(max_value, data_list[[ii]][[jj]][["raw_zero_imputation"]], data_list[[ii]][[jj]][["induced_zero_imputation"]]))
}
n = 2 ^ (round(log(max_value, base = 2)) + 7)
bw = (max_value * (n + 10)) / (n ^ 2)
to = max_value + (bw * 5)
from = -(bw * 5)
x_max = 0
for(jj in method_names){
data_list[[ii]][[jj]][["raw_zero_density"]] = density(data_list[[ii]][[jj]][["raw_zero_imputation"]], from = from, to = to, n = n, bw = bw)
data_list[[ii]][[jj]][["induced_zero_density"]] = density(data_list[[ii]][[jj]][["induced_zero_imputation"]], from = from, to = to, n = n, bw = bw)
data_list[[ii]][[jj]][["y_max"]] = max(c(data_list[[ii]][[jj]][["raw_zero_density"]]$y, data_list[[ii]][[jj]][["induced_zero_density"]]$y))
cutoff = data_list[[ii]][[jj]][["y_max"]] * 0.005
test_vector = !(data_list[[ii]][[jj]][["raw_zero_density"]]$y < cutoff & data_list[[ii]][[jj]][["induced_zero_density"]]$y < cutoff)
for(kk in seq(from = length(test_vector), to = 1)){
if(test_vector[kk]){
x_max = max(c(x_max, data_list[[ii]][[jj]][["raw_zero_density"]]$x[kk + 1]))
break()
}
}
}
data_list[[ii]][["x_max"]] = x_max
cat("Finish: ", ii, "\n")
}
Finish: MELANOMA
Finish: SSCORTEX
Finish: CBMC
Finish: PBMC
for(ii in method_names){
for(jj in used_datasets){
plot(0, type = "n", xlim = c(-min(c(1, data_list[[jj]][["x_max"]] * 0.05)), data_list[[jj]][["x_max"]]), ylim = c(0, data_list[[jj]][[ii]][["y_max"]]), axes = FALSE, ann = FALSE, frame.plot = TRUE)
lines(data_list[[jj]][[ii]][["raw_zero_density"]], col = "black", lwd = 2)
lines(data_list[[jj]][[ii]][["induced_zero_density"]], col = "red", lwd = 2)
axis(1)
mtext(jj, outer = TRUE, cex = 1.2, font = 2, line = 0.5, at = mtext_position[jj])
cat("Finish: ", ii, " - ", jj, "\n")
}
mtext(paste0(ii, " - All genes"), outer = TRUE, cex = 1.4, font = 2, line = 3)
legend("topright", c("Raw zero", "Sampled zero"), lty = 1, col = c("black", "red"), lwd = 2)
}
Finish: DISC - MELANOMA
Finish: DISC - SSCORTEX
Finish: DISC - CBMC
Finish: DISC - PBMC
Finish: scVI - MELANOMA
Finish: scVI - SSCORTEX
Finish: scVI - CBMC
Finish: scVI - PBMC
Finish: MAGIC - MELANOMA
Finish: MAGIC - SSCORTEX
Finish: MAGIC - CBMC
Finish: MAGIC - PBMC
Finish: DCA - MELANOMA
Finish: DCA - SSCORTEX
Finish: DCA - CBMC
Finish: DCA - PBMC
Finish: scScope - MELANOMA
Finish: scScope - SSCORTEX
Finish: scScope - CBMC
Finish: scScope - PBMC
Finish: DeepImpute - MELANOMA
Finish: DeepImpute - SSCORTEX
Finish: DeepImpute - CBMC
Finish: DeepImpute - PBMC
Finish: VIPER - MELANOMA
Finish: VIPER - SSCORTEX
Finish: VIPER - CBMC
Finish: VIPER - PBMC
Finish: scImpute - MELANOMA
Finish: scImpute - SSCORTEX
Finish: scImpute - CBMC
Finish: scImpute - PBMC
par(mfrow = c(1, length(used_datasets)), mar = c(5, 0, 0.5, 0), oma = c(0, 5, 6, 2), mgp = c(3.5, 1, 0), cex.axis = 1.1, cex.lab = 1.5, font.lab = 2, cex.main = 1.5)
mtext_position = seq(from = 0, to = 1, by = 1 / length(used_datasets) / 2)[seq(length(used_datasets)) * 2]
names(mtext_position) = used_datasets
for(ii in used_datasets){
max_value = 0
for(jj in method_names){
max_value = max(c(max_value, data_list[[ii]][[jj]][["top_raw_zero_imputation"]], data_list[[ii]][[jj]][["top_induced_zero_imputation"]]))
}
n = 2 ^ (round(log(max_value, base = 2)) + 7)
bw = (max_value * (n + 10)) / (n ^ 2)
to = max_value + (bw * 5)
from = -(bw * 5)
x_max = 0
for(jj in method_names){
data_list[[ii]][[jj]][["top_raw_zero_density"]] = density(data_list[[ii]][[jj]][["top_raw_zero_imputation"]], from = from, to = to, n = n, bw = bw)
data_list[[ii]][[jj]][["top_induced_zero_density"]] = density(data_list[[ii]][[jj]][["top_induced_zero_imputation"]], from = from, to = to, n = n, bw = bw)
data_list[[ii]][[jj]][["top_y_max"]] = max(c(data_list[[ii]][[jj]][["top_raw_zero_density"]]$y, data_list[[ii]][[jj]][["top_induced_zero_density"]]$y))
cutoff = data_list[[ii]][[jj]][["top_y_max"]] * 0.005
test_vector = !(data_list[[ii]][[jj]][["top_raw_zero_density"]]$y < cutoff & data_list[[ii]][[jj]][["top_induced_zero_density"]]$y < cutoff)
for(kk in seq(from = length(test_vector), to = 1)){
if(test_vector[kk]){
x_max = max(c(x_max, data_list[[ii]][[jj]][["top_raw_zero_density"]]$x[kk + 1]))
break()
}
}
}
data_list[[ii]][["top_x_max"]] = x_max
cat("Finish: ", ii, "\n")
}
Finish: MELANOMA
Finish: SSCORTEX
Finish: CBMC
Finish: PBMC
for(ii in method_names){
for(jj in used_datasets){
plot(0, type = "n", xlim = c(-min(c(1, data_list[[jj]][["top_x_max"]] * 0.05)), data_list[[jj]][["top_x_max"]]), ylim = c(0, data_list[[jj]][[ii]][["top_y_max"]]), axes = FALSE, ann = FALSE, frame.plot = TRUE)
lines(data_list[[jj]][[ii]][["top_raw_zero_density"]], col = "black", lwd = 2)
lines(data_list[[jj]][[ii]][["top_induced_zero_density"]], col = "red", lwd = 2)
axis(1)
mtext(jj, outer = TRUE, cex = 1.2, font = 2, line = 0.5, at = mtext_position[jj])
cat("Finish: ", ii, " - ", jj, "\n")
}
mtext(paste0(ii, " - Top 1000 genes"), outer = TRUE, cex = 1.4, font = 2, line = 3)
legend("topright", c("Raw zero", "Sampled zero"), lty = 1, col = c("black", "red"), lwd = 2)
}
Finish: DISC - MELANOMA
Finish: DISC - SSCORTEX
Finish: DISC - CBMC
Finish: DISC - PBMC
Finish: scVI - MELANOMA
Finish: scVI - SSCORTEX
Finish: scVI - CBMC
Finish: scVI - PBMC
Finish: MAGIC - MELANOMA
Finish: MAGIC - SSCORTEX
Finish: MAGIC - CBMC
Finish: MAGIC - PBMC
Finish: DCA - MELANOMA
Finish: DCA - SSCORTEX
Finish: DCA - CBMC
Finish: DCA - PBMC
Finish: scScope - MELANOMA
Finish: scScope - SSCORTEX
Finish: scScope - CBMC
Finish: scScope - PBMC
Finish: DeepImpute - MELANOMA
Finish: DeepImpute - SSCORTEX
Finish: DeepImpute - CBMC
Finish: DeepImpute - PBMC
Finish: VIPER - MELANOMA
Finish: VIPER - SSCORTEX
Finish: VIPER - CBMC
Finish: VIPER - PBMC
Finish: scImpute - MELANOMA
Finish: scImpute - SSCORTEX
Finish: scImpute - CBMC
Finish: scImpute - PBMC