Benckmark scSpace with other methods on 140 simulated datasets
library(ggplot2)
library(ggthemes)
library(ggsignif)
library(mclust)
## Package 'mclust' version 6.0.0
## Type 'citation("mclust")' for citing this R package in publications.
library(RColorBrewer)
Load data
# Ground Truth
sim_gt <- readRDS('../data/simulation/sim_groundtruth.RDS')
# scSpace
sim_scspace <- readRDS('../data/simulation/sim_scSpace.RDS')
# Louvain
sim_louvain <- readRDS('../data/simulation/sim_louvain.RDS')
# K-means
sim_kmeans <- readRDS('../data/simulation/sim_kmeans.RDS')
# Hierarchical clustering
sim_hclust <- readRDS('../data/simulation/sim_hclust.RDS')
# scCoGAPS
sim_sccogaps <- readRDS('../data/simulation/sim_scCoGAPS.RDS')
# DR.SC
sim_drsc <- readRDS('../data/simulation/sim_DRSC.RDS')
# BayesSpace
sim_bayesspace <- readRDS('../data/simulation/sim_BayesSpace.RDS')
# SpaGCN
sim_spagcn <- readRDS('../data/simulation/sim_SpaGCN.RDS')
# STAGATE
sim_stagate <- readRDS('../data/simulation/sim_STAGATE.RDS')
Set output dir
output_dir <- '../output/figure1'
if(!dir.exists(output_dir)){
dir.create(output_dir)
} else {
print('Dir already exists!')
}
Calculate ARI of all celltypes
all_res <- data.frame()
for (i in 1:140) {
scspace_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_scspace[[i]]$scSpace)
louvain_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_louvain[[i]]$louvain)
kmeans_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_kmeans[[i]]$kmeans)
hclust_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_hclust[[i]]$hclust)
sccogaps_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_sccogaps[[i]]$scCoGAPS)
drsc_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_drsc[[i]]$spatial.drsc.cluster)
bayesspace_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_bayesspace[[i]]$spatial.cluster)
spagcn_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_spagcn[[i]]$SpaGCN)
stagate_tmp <- adjustedRandIndex(sim_gt[[i]]$Group, sim_stagate[[i]]$STAGATE)
res_tmp <- data.frame(
simulation = names(sim_gt)[i],
ARI = c(scspace_tmp, louvain_tmp, kmeans_tmp, hclust_tmp, sccogaps_tmp,
drsc_tmp, bayesspace_tmp, spagcn_tmp, stagate_tmp),
method = c('scSpace', 'louvain', 'kmeans', 'hclust', 'scCoGAPS',
'DR.SC', 'BayesSpace', 'SpaGCN', 'STAGATE')
)
all_res <- rbind(all_res, res_tmp)
}
write.csv(all_res, file = paste0(output_dir, '/all_res.csv'))
Plot boxplot of ARI for all celltypes
all_res$method <- factor(all_res$method, levels = c('scSpace', 'louvain', 'kmeans', 'STAGATE', 'BayesSpace', 'DR.SC', 'scCoGAPS', 'SpaGCN', 'hclust'))
p <- ggplot(all_res, aes(method, ARI, fill = method)) +
geom_boxplot() +
scale_fill_manual(values = c('#984EA3', '#4DAF4A', '#377EB8', '#FFFF33','#A65628',
'#F781BF', '#10B981', '#999999', '#E41A1C')) +
theme_classic() +
geom_signif(
comparisons = list(c('scSpace', 'louvain'),c('scSpace', 'kmeans'), c('scSpace', 'STAGATE'),
c('scSpace', 'BayesSpace'), c('scSpace', 'DR.SC'),c('scSpace', 'scCoGAPS'),
c('scSpace', 'SpaGCN'), c('scSpace', 'hclust')),
test = 'wilcox.test',
step_increase = 0.1) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))
p

Calculate ARI of all celltypes
sub_res <- data.frame()
for (i in 1:140) {
if(i %in% c(1, 4, 7, 11, 14, 17, 21, 24, 27, 31, 34, 37, 41, 44, 47)){
subcluster_num <- 2
} else if(i %in% c(2, 5, 8, 9, 12, 15, 18, 19, 22, 25, 28, 29, 32, 35, 38, 39, 42, 45, 48, 49)){
subcluster_num <- 3
} else if(i %in% c(3, 6, 10, 13, 16, 20, 23, 26, 30, 33, 36, 40, 43, 46, 50)){
subcluster_num <- 4
} else if(i %in% c(51:65)){
subcluster_num <- 5
} else if(i %in% c(66:80)){
subcluster_num <- 6
} else if(i %in% c(81:95)){
subcluster_num <- 7
} else if(i %in% c(96:110)){
subcluster_num <- 8
} else if(i %in% c(111:125)){
subcluster_num <- 9
} else if(i %in% c(126:140)){
subcluster_num <- 10
}
cluster_num <- length(unique(sim_gt[[i]]$Group))
subcluster_name <- paste0('Group', (1 + cluster_num - subcluster_num):cluster_num)
scspace_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_scspace[[i]][sim_scspace[[i]]$Group %in% subcluster_name, ]$scSpace)
louvain_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_louvain[[i]][sim_louvain[[i]]$Group %in% subcluster_name, ]$louvain)
kmeans_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_kmeans[[i]][sim_kmeans[[i]]$Group %in% subcluster_name, ]$kmeans)
hclust_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_hclust[[i]][sim_hclust[[i]]$Group %in% subcluster_name, ]$hclust)
sccogaps_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_sccogaps[[i]][sim_sccogaps[[i]]$Group %in% subcluster_name, ]$scCoGAPS)
drsc_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_drsc[[i]][sim_drsc[[i]]$Group %in% subcluster_name, ]$spatial.drsc.cluster)
bayesspace_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_bayesspace[[i]][sim_bayesspace[[i]]$Group %in% subcluster_name, ]$spatial.cluster)
spagcn_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_spagcn[[i]][sim_spagcn[[i]]$Group %in% subcluster_name, ]$SpaGCN)
stagate_tmp <- adjustedRandIndex(sim_gt[[i]][sim_gt[[i]]$Group %in% subcluster_name, ]$Group, sim_stagate[[i]][sim_stagate[[i]]$Group %in% subcluster_name, ]$STAGATE)
res_tmp <- data.frame(
simulation = names(sim_gt)[i],
subcluster_num = subcluster_num,
ARI = c(scspace_tmp, louvain_tmp, kmeans_tmp, hclust_tmp, sccogaps_tmp,
drsc_tmp, bayesspace_tmp, spagcn_tmp, stagate_tmp),
method = c('scSpace', 'louvain', 'kmeans', 'hclust', 'scCoGAPS',
'DR.SC', 'BayesSpace', 'SpaGCN', 'STAGATE')
)
sub_res <- rbind(sub_res, res_tmp)
}
write.csv(sub_res, file = paste0(output_dir, '/sub_res.csv'))
Plot boxplot of ARI for spatial subclusters
sub_res$method <- factor(sub_res$method, levels = c('scSpace', 'louvain', 'kmeans', 'STAGATE', 'BayesSpace', 'DR.SC', 'scCoGAPS', 'SpaGCN', 'hclust'))
p <- ggplot(sub_res, aes(as.factor(subcluster_num), ARI, fill = method)) +
geom_boxplot() +
scale_fill_manual(values = c('#984EA3', '#4DAF4A', '#377EB8', '#FFFF33','#A65628',
'#F781BF', '#10B981', '#999999', '#E41A1C')) +
theme_classic()
p
