# Load in spatialRNA data and Reference data
library(spacexr)
library(Matrix)
library(devtools)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(dplyr)
library(ggrepel)
library(fields)
library(stringr)
load_all()
pwd = getwd()
datadir <- paste0('~/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/DEGLAM/data/moffitt','/')
resultsdir <- paste0('../../data/SpatialRNA/VisiumLN')
dir.exists(resultsdir)
myRCTD = readRDS(file.path(resultsdir,'RCTD_object_with_CSIDE_single_B_density_weight_threshold_0.8_binary_cutoff_0.339.rds'))
cell_types_present <- myRCTD@internal_vars_de$cell_types_present
cell_types <- myRCTD@internal_vars_de$cell_types
gene_fits <- myRCTD@de_results$gene_fits
gene <- 'CXCL13'
my_barc <- myRCTD@internal_vars_de$barcodes
p1 <- plot_class(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , factor(myRCTD@internal_vars_de$X2[,2]),
title ='') + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ geom_segment(aes(x = 1934.6, y = 1800, xend = 2184.6, yend = 1800), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+ scale_color_manual("",values = c("#0072B2", "#D55E00"), labels = c('B Cell Poor','B Cell Rich'))
#scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Distance from midline", labels = c(0,1),breaks = c(0,1), limits = c(0,1))
p2 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , ylimit = c(0,10), myRCTD@spatialRNA@counts[gene,my_barc],title ='', size = 0.5) + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ geom_segment(aes(x = 1934.6, y = 1800, xend = 2184.6, yend = 1800), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "CXCL13 Expression", labels = c(0,10),breaks = c(0,10), limits = c(0,10))
ggarrange(p1,p2,nrow = 2)
### Plot DC Cell Type
dc_prop <- myRCTD@internal_vars_de$my_beta[,'DC']
p2 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , ylimit = c(0,.15), dc_prop[my_barc],title ='', size = .5) + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ geom_segment(aes(x = 1934.6, y = 1800, xend = 2184.6, yend = 1800), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Dentritic Cell Proportion", labels = c(0,.15),breaks = c(0,.15), limits = c(0,.15))
p2
barcodes <- myRCTD@internal_vars_de$barcodes
gene <- 'CXCL13'
Y <- myRCTD@spatialRNA@counts[gene, barcodes]
Yn <- Y / myRCTD@spatialRNA@nUMI[barcodes]
dc_prop <- myRCTD@internal_vars_de$my_beta[,'DC']
thresh <- median(dc_prop)
reg <- myRCTD@internal_vars_de$X2[barcodes,2]
pred <- predict_CSIDE_all(myRCTD, gene)
n_box <- 10
MULT <- 500
gex <- matrix(0, n_box, 4)
pex <- matrix(0, n_box, 4)
stex <- matrix(0, n_box, 4)
for(i in 1:n_box) {
tl <- quantile(dc_prop, (i-1)/n_box)
th <- quantile(dc_prop, (i)/n_box)
gex[i,2] <- (mean(Yn[reg & dc_prop > tl & dc_prop < th]))
gex[i,1] <- (mean(Yn[!reg & dc_prop > tl & dc_prop < th]))
gex[i,3] <- (mean(dc_prop[dc_prop > tl & dc_prop < th]))
gex[i,4] <- i
pex[i,2] <- (mean(pred[reg & dc_prop > tl & dc_prop < th]))
pex[i,1] <- (mean(pred[!reg & dc_prop > tl & dc_prop < th]))
pex[i,3] <- (mean(dc_prop[dc_prop > tl & dc_prop < th]))
pex[i,4] <- i
stex[i,2] <- (sd(Yn[reg & dc_prop > tl & dc_prop < th])/sqrt(length(Yn[reg & dc_prop > tl & dc_prop < th])))
stex[i,1] <- (sd(Yn[!reg & dc_prop > tl & dc_prop < th])/sqrt(length(Yn[!reg & dc_prop > tl & dc_prop < th])))
stex[i,3] <- (mean(dc_prop[dc_prop > tl & dc_prop < th]))
stex[i,4] <- i
#print(length(Yn[reg & dc_prop > tl & dc_prop < th]))
#print(length(Yn[!reg & dc_prop > tl & dc_prop < th]))
}
colnames(gex) <- c('out', 'inn', 'dc', 'ind')
plot_df <- melt(data.frame(gex), id.vars = c('dc','ind'))
colnames(plot_df) <- c('dc','q', 'r', 'y')
plot_df$r <- factor(plot_df$r)
colnames(pex) <- c('out', 'inn', 'dc', 'ind')
plot_df2 <- melt(data.frame(pex), id.vars = c('dc','ind'))
colnames(plot_df2) <- c('dc','q', 'r', 'y')
plot_df2$r <- factor(plot_df2$r)
colnames(stex) <- c('out', 'inn', 'dc', 'ind')
plot_df3 <- melt(data.frame(stex), id.vars = c('dc','ind'))
colnames(plot_df3) <- c('dc','q', 'r', 'y')
plot_df3$r <- factor(plot_df3$r)
plot_df$se <- plot_df3$y
p3 <- ggplot(plot_df, aes(x=dc,y=y*MULT,color=r)) + geom_point() + geom_errorbar(aes(ymin = (y - se*1.96)*MULT, ymax = (y+se*1.96)*MULT))+
theme_classic() + ylim(c(0,.0008*MULT)) + xlab('Dentritic Cell Proportion') + ylab('CXCL13 Expression') +
geom_line(data = plot_df2) + xlim(c(0,.16)) +
scale_color_manual("", values = c("#D55E00", "#0072B2"), breaks = c('inn','out'), labels = c('B Cells Near','B Cells Far'))
p3
n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr["T_CD4"] <- "#56B4E9"
my_pal_curr["B"] <- "#0072B2"
my_pal_curr["DC"] <- "#000000"
my_pal_curr["Macrophages"] <- "#CC79A7"
plot_df_list <- list()
for(cell_type in cell_types[c(1,3,5,6)]) {
de_df <- myRCTD@de_results$all_gene_list[[cell_type]]
gene_big <- rownames(de_df)
cell_type_means <- myRCTD@cell_type_info$info[[1]][gene_big,cell_types_present]
cell_prop <- sweep(cell_type_means,1,apply(cell_type_means,1,max),'/')
p_vals <- de_df$p_val
names(p_vals) <- gene_big
plot_df <- data.frame(gene_big, cell_type, de_df$log_fc, -log(pmax(p_vals,1e-16),10), gene_big %in% rownames(myRCTD@de_results$sig_gene_list[[cell_type]]))
colnames(plot_df) <- c('gene', 'ct', 'mean', 'y', 'sig')
plot_df_list[[cell_type]] <- plot_df
}
plot_df <- bind_rows(plot_df_list)
plot_df$label <- NA
plot_df[plot_df$ct == 'B' & plot_df$gene %in% c('RGS13','STMN1','RASGRP2'), 'label'] <-
plot_df[plot_df$ct == 'B' & plot_df$gene %in% c('RGS13','STMN1','RASGRP2'), 'gene']
plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'label'] <-
plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'gene']
plot_df[plot_df$ct == 'T_CD4' & plot_df$gene %in% c('RILPL2'), 'label'] <-
plot_df[plot_df$ct == 'T_CD4' & plot_df$gene %in% c('RILPL2'), 'gene']
plot_df[plot_df$ct == 'Macrophages' & plot_df$gene %in% c('SPARC'), 'label'] <-
plot_df[plot_df$ct == 'Macrophages' & plot_df$gene %in% c('SPARC'), 'gene']
#plot_df$label <- plot_df$gene
#plot_df$label[!plot_df$sig] <- NA
#clutter_factor <- 1.2
#plot_df$label[plot_df$ct %in% c('CA1', 'CA3')][round((1:floor(length(plot_df$ct %in% c('CA1', 'CA3'))/clutter_factor))*clutter_factor)] <- NA # reduce clutter
if(length(grep("mt-",plot_df$gene)))
plot_df <- plot_df[plot_df$gene[-grep("mt-",plot_df$gene)],]
plot_df$mean <- as.numeric(plot_df$mean)
plot_df$y <- as.numeric(plot_df$y)
plot_df$mean <- pmin(pmax(plot_df$mean,-3/log(exp(1),2)),3/log(exp(1),2))
p <- ggplot(plot_df, aes(x=mean*log(exp(1),2), y = y, color = ct, alpha = sig)) + geom_point() + theme_classic() +
geom_vline(xintercept = 0.4*log(exp(1),2), linetype = 'dotted') + geom_vline(xintercept = -0.4*log(exp(1),2), linetype = 'dotted') +
geom_label_repel(aes(label = label),nudge_x = 0.1,na.rm = TRUE, show.legend = F, max.overlaps = 20, label.padding = 0.1) + labs(color = 'Cell Type') + xlab('Estimated cell type-specific DE by CSIDE') + ylab('CSIDE p-value') + scale_y_continuous(lim = c(0,16.01), breaks = c(0,5,10,15),labels = c("10^0", "10^(-5)", "10^(-10)","10^(-15)") ) + ggplot2::scale_color_manual("Cell type",values = my_pal_curr[c('B','DC','T_CD4','Macrophages')], breaks = c('B','DC','T_CD4','Macrophages'), labels = c('B','DC','T_CD4','Macrophages')) + scale_alpha_manual("", labels = c('Not significant', 'Significant'), values = c(0.2,1))
p
### Volcano plot only for Dendritic Cells:
n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr["T_CD4"] <- "#56B4E9"
my_pal_curr["B"] <- "#0072B2"
my_pal_curr["DC"] <- "#000000"
my_pal_curr["Macrophages"] <- "#CC79A7"
plot_df_list <- list()
for(cell_type in cell_types[c(3)]) {
de_df <- myRCTD@de_results$all_gene_list[[cell_type]]
gene_big <- rownames(de_df)
cell_type_means <- myRCTD@cell_type_info$info[[1]][gene_big,cell_types_present]
cell_prop <- sweep(cell_type_means,1,apply(cell_type_means,1,max),'/')
p_vals <- de_df$p_val
names(p_vals) <- gene_big
plot_df <- data.frame(gene_big, cell_type, de_df$log_fc, -log(pmax(p_vals,1e-16),10), gene_big %in% rownames(myRCTD@de_results$sig_gene_list[[cell_type]]))
colnames(plot_df) <- c('gene', 'ct', 'mean', 'y', 'sig')
plot_df_list[[cell_type]] <- plot_df
}
plot_df <- bind_rows(plot_df_list)
plot_df$label <- NA
plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'label'] <-
plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'gene']
#plot_df$label <- plot_df$gene
#plot_df$label[!plot_df$sig] <- NA
#clutter_factor <- 1.2
#plot_df$label[plot_df$ct %in% c('CA1', 'CA3')][round((1:floor(length(plot_df$ct %in% c('CA1', 'CA3'))/clutter_factor))*clutter_factor)] <- NA # reduce clutter
if(length(grep("mt-",plot_df$gene)))
plot_df <- plot_df[plot_df$gene[-grep("mt-",plot_df$gene)],]
plot_df$mean <- as.numeric(plot_df$mean)
plot_df$y <- as.numeric(plot_df$y)
plot_df$mean <- pmin(pmax(plot_df$mean,-3/log(exp(1),2)),3/log(exp(1),2))
p <- ggplot(plot_df, aes(x=mean*log(exp(1),2), y = y, color = ct, alpha = sig)) + geom_point() + theme_classic() +
geom_vline(xintercept = 0.4*log(exp(1),2), linetype = 'dotted') + geom_vline(xintercept = -0.4*log(exp(1),2), linetype = 'dotted') +
geom_label_repel(aes(label = label),nudge_x = 0.1,na.rm = TRUE, show.legend = F, max.overlaps = 20, label.padding = 0.1) + labs(color = 'Cell Type') + xlab('Estimated dendritic cell DE by CSIDE') + ylab('CSIDE p-value') + scale_y_continuous(lim = c(0,16.01), breaks = c(0,5,10,15),labels = c("10^0", "10^(-5)", "10^(-10)","10^(-15)") ) + ggplot2::scale_color_manual("Cell type",values = my_pal_curr[c('B','DC','T_CD4','Macrophages')], breaks = c('B','DC','T_CD4','Macrophages'), labels = c('B','DC','T_CD4','Macrophages')) + scale_alpha_manual("", labels = c('Not significant', 'Significant'), values = c(0.2,1))
p