CSIDE on the Visium Lymph Node

Load in CSIDE results

# Load in spatialRNA data and Reference data
library(spacexr)
library(Matrix)
library(devtools)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(dplyr)
library(ggrepel)
library(fields)
library(stringr)
load_all()
pwd = getwd()
datadir <- paste0('~/Documents/MIT/Research/Rafalab/Projects/slideseq/Cell Demixing/ContentStructure/DEGLAM/data/moffitt','/')
resultsdir <- paste0('../../data/SpatialRNA/VisiumLN')
dir.exists(resultsdir)
myRCTD = readRDS(file.path(resultsdir,'RCTD_object_with_CSIDE_single_B_density_weight_threshold_0.8_binary_cutoff_0.339.rds'))
cell_types_present <- myRCTD@internal_vars_de$cell_types_present
cell_types <- myRCTD@internal_vars_de$cell_types
gene_fits <- myRCTD@de_results$gene_fits
gene <- 'CXCL13'

Plot predictive-variable and CXCL13 Gene

my_barc <- myRCTD@internal_vars_de$barcodes
p1 <- plot_class(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , factor(myRCTD@internal_vars_de$X2[,2]), 
                     title ='') + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+  geom_segment(aes(x = 1934.6, y = 1800, xend = 2184.6, yend = 1800), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+ scale_color_manual("",values = c("#0072B2", "#D55E00"), labels = c('B Cell Poor','B Cell Rich'))
  #scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Distance from midline", labels = c(0,1),breaks = c(0,1), limits = c(0,1))
p2 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , ylimit = c(0,10), myRCTD@spatialRNA@counts[gene,my_barc],title ='', size = 0.5) + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+  geom_segment(aes(x = 1934.6, y = 1800, xend = 2184.6, yend = 1800), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "CXCL13 Expression", labels = c(0,10),breaks = c(0,10), limits = c(0,10))
ggarrange(p1,p2,nrow = 2)

### Plot DC Cell Type

dc_prop <- myRCTD@internal_vars_de$my_beta[,'DC']
p2 <- plot_puck_continuous(myRCTD@spatialRNA, colnames(myRCTD@spatialRNA@counts) , ylimit = c(0,.15), dc_prop[my_barc],title ='', size = .5) + ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+  geom_segment(aes(x = 1934.6, y = 1800, xend = 2184.6, yend = 1800), color = "black") + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())+scale_color_gradientn(colors = pals::brewer.blues(20)[2:20],name = "Dentritic Cell Proportion", labels = c(0,.15),breaks = c(0,.15), limits = c(0,.15))
p2

Quantitative expression plot

barcodes <- myRCTD@internal_vars_de$barcodes
gene <- 'CXCL13'
Y <- myRCTD@spatialRNA@counts[gene, barcodes]
Yn <- Y / myRCTD@spatialRNA@nUMI[barcodes]
dc_prop <- myRCTD@internal_vars_de$my_beta[,'DC']
thresh <- median(dc_prop)
reg <- myRCTD@internal_vars_de$X2[barcodes,2]
pred <- predict_CSIDE_all(myRCTD, gene)
n_box <- 10
MULT <- 500
gex <- matrix(0, n_box, 4)
pex <- matrix(0, n_box, 4)
stex <- matrix(0, n_box, 4)
for(i in 1:n_box) {
  tl <- quantile(dc_prop, (i-1)/n_box)
  th <- quantile(dc_prop, (i)/n_box)
  gex[i,2] <- (mean(Yn[reg & dc_prop > tl & dc_prop < th]))
  gex[i,1] <- (mean(Yn[!reg & dc_prop > tl & dc_prop < th]))
  gex[i,3] <- (mean(dc_prop[dc_prop > tl & dc_prop < th]))
  gex[i,4] <- i
  pex[i,2] <- (mean(pred[reg & dc_prop > tl & dc_prop < th]))
  pex[i,1] <- (mean(pred[!reg & dc_prop > tl & dc_prop < th]))
  pex[i,3] <- (mean(dc_prop[dc_prop > tl & dc_prop < th]))
  pex[i,4] <- i
  stex[i,2] <- (sd(Yn[reg & dc_prop > tl & dc_prop < th])/sqrt(length(Yn[reg & dc_prop > tl & dc_prop < th])))
  stex[i,1] <- (sd(Yn[!reg & dc_prop > tl & dc_prop < th])/sqrt(length(Yn[!reg & dc_prop > tl & dc_prop < th])))
  stex[i,3] <- (mean(dc_prop[dc_prop > tl & dc_prop < th]))
  stex[i,4] <- i
  #print(length(Yn[reg & dc_prop > tl & dc_prop < th]))
  #print(length(Yn[!reg & dc_prop > tl & dc_prop < th]))
}
colnames(gex) <- c('out', 'inn', 'dc', 'ind')
plot_df <- melt(data.frame(gex), id.vars = c('dc','ind'))
colnames(plot_df) <- c('dc','q', 'r', 'y')
plot_df$r <- factor(plot_df$r)
colnames(pex) <- c('out', 'inn', 'dc', 'ind')
plot_df2 <- melt(data.frame(pex), id.vars = c('dc','ind'))
colnames(plot_df2) <- c('dc','q', 'r', 'y')
plot_df2$r <- factor(plot_df2$r)
colnames(stex) <- c('out', 'inn', 'dc', 'ind')
plot_df3 <- melt(data.frame(stex), id.vars = c('dc','ind'))
colnames(plot_df3) <- c('dc','q', 'r', 'y')
plot_df3$r <- factor(plot_df3$r)
plot_df$se <- plot_df3$y
p3 <- ggplot(plot_df, aes(x=dc,y=y*MULT,color=r)) + geom_point() + geom_errorbar(aes(ymin = (y - se*1.96)*MULT, ymax = (y+se*1.96)*MULT))+
  theme_classic() + ylim(c(0,.0008*MULT)) + xlab('Dentritic Cell Proportion') + ylab('CXCL13 Expression') +
  geom_line(data = plot_df2) + xlim(c(0,.16)) +
  scale_color_manual("", values = c("#D55E00", "#0072B2"), breaks = c('inn','out'), labels = c('B Cells Near','B Cells Far'))

p3

Make volcano plot

n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr["T_CD4"] <- "#56B4E9"
my_pal_curr["B"] <- "#0072B2"
my_pal_curr["DC"] <- "#000000"
my_pal_curr["Macrophages"] <- "#CC79A7"
plot_df_list <- list()
for(cell_type in cell_types[c(1,3,5,6)]) {
  de_df <- myRCTD@de_results$all_gene_list[[cell_type]]
  gene_big <- rownames(de_df)
  cell_type_means <- myRCTD@cell_type_info$info[[1]][gene_big,cell_types_present]
  cell_prop <- sweep(cell_type_means,1,apply(cell_type_means,1,max),'/')
  p_vals <- de_df$p_val
  names(p_vals) <- gene_big
  plot_df <- data.frame(gene_big, cell_type, de_df$log_fc, -log(pmax(p_vals,1e-16),10),  gene_big %in% rownames(myRCTD@de_results$sig_gene_list[[cell_type]]))
  colnames(plot_df) <- c('gene', 'ct', 'mean', 'y', 'sig')
  plot_df_list[[cell_type]] <- plot_df
}
plot_df <- bind_rows(plot_df_list)
plot_df$label <- NA
plot_df[plot_df$ct == 'B' & plot_df$gene %in% c('RGS13','STMN1','RASGRP2'), 'label'] <-
  plot_df[plot_df$ct == 'B' & plot_df$gene %in% c('RGS13','STMN1','RASGRP2'), 'gene']
plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'label'] <-
  plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'gene']
plot_df[plot_df$ct == 'T_CD4' & plot_df$gene %in% c('RILPL2'), 'label'] <-
  plot_df[plot_df$ct == 'T_CD4' & plot_df$gene %in% c('RILPL2'), 'gene']
plot_df[plot_df$ct == 'Macrophages' & plot_df$gene %in% c('SPARC'), 'label'] <-
  plot_df[plot_df$ct == 'Macrophages' & plot_df$gene %in% c('SPARC'), 'gene']
#plot_df$label <- plot_df$gene
#plot_df$label[!plot_df$sig] <- NA
#clutter_factor <- 1.2
#plot_df$label[plot_df$ct %in% c('CA1', 'CA3')][round((1:floor(length(plot_df$ct %in% c('CA1', 'CA3'))/clutter_factor))*clutter_factor)] <- NA # reduce clutter
if(length(grep("mt-",plot_df$gene)))
  plot_df <- plot_df[plot_df$gene[-grep("mt-",plot_df$gene)],]
plot_df$mean <- as.numeric(plot_df$mean)
plot_df$y <- as.numeric(plot_df$y)
plot_df$mean <- pmin(pmax(plot_df$mean,-3/log(exp(1),2)),3/log(exp(1),2))
p <- ggplot(plot_df, aes(x=mean*log(exp(1),2), y = y, color = ct, alpha = sig)) + geom_point() + theme_classic() + 
  geom_vline(xintercept = 0.4*log(exp(1),2), linetype = 'dotted') + geom_vline(xintercept = -0.4*log(exp(1),2), linetype = 'dotted') +
  geom_label_repel(aes(label = label),nudge_x = 0.1,na.rm = TRUE, show.legend = F, max.overlaps = 20, label.padding = 0.1) + labs(color = 'Cell Type') + xlab('Estimated cell type-specific DE by CSIDE') + ylab('CSIDE p-value') + scale_y_continuous(lim = c(0,16.01), breaks = c(0,5,10,15),labels = c("10^0", "10^(-5)", "10^(-10)","10^(-15)") ) +  ggplot2::scale_color_manual("Cell type",values = my_pal_curr[c('B','DC','T_CD4','Macrophages')], breaks = c('B','DC','T_CD4','Macrophages'), labels = c('B','DC','T_CD4','Macrophages')) + scale_alpha_manual("", labels = c('Not significant', 'Significant'), values = c(0.2,1))
p

### Volcano plot only for Dendritic Cells:

n_levels = myRCTD@cell_type_info$info[[3]]
my_pal = pals::kelly(n_levels+1)[2:(n_levels+1)]
names(my_pal) = myRCTD@cell_type_info$info[[2]]
my_pal_curr <- my_pal
my_pal_curr["T_CD4"] <- "#56B4E9"
my_pal_curr["B"] <- "#0072B2"
my_pal_curr["DC"] <- "#000000"
my_pal_curr["Macrophages"] <- "#CC79A7"
plot_df_list <- list()
for(cell_type in cell_types[c(3)]) {
  de_df <- myRCTD@de_results$all_gene_list[[cell_type]]
  gene_big <- rownames(de_df)
  cell_type_means <- myRCTD@cell_type_info$info[[1]][gene_big,cell_types_present]
  cell_prop <- sweep(cell_type_means,1,apply(cell_type_means,1,max),'/')
  p_vals <- de_df$p_val
  names(p_vals) <- gene_big
  plot_df <- data.frame(gene_big, cell_type, de_df$log_fc, -log(pmax(p_vals,1e-16),10),  gene_big %in% rownames(myRCTD@de_results$sig_gene_list[[cell_type]]))
  colnames(plot_df) <- c('gene', 'ct', 'mean', 'y', 'sig')
  plot_df_list[[cell_type]] <- plot_df
}
plot_df <- bind_rows(plot_df_list)
plot_df$label <- NA
plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'label'] <-
  plot_df[plot_df$ct == 'DC' & plot_df$gene %in% c('CLU','FSCN1','CCL19','CXCL13','FDCSP','CR2'), 'gene']
#plot_df$label <- plot_df$gene
#plot_df$label[!plot_df$sig] <- NA
#clutter_factor <- 1.2
#plot_df$label[plot_df$ct %in% c('CA1', 'CA3')][round((1:floor(length(plot_df$ct %in% c('CA1', 'CA3'))/clutter_factor))*clutter_factor)] <- NA # reduce clutter
if(length(grep("mt-",plot_df$gene)))
  plot_df <- plot_df[plot_df$gene[-grep("mt-",plot_df$gene)],]
plot_df$mean <- as.numeric(plot_df$mean)
plot_df$y <- as.numeric(plot_df$y)
plot_df$mean <- pmin(pmax(plot_df$mean,-3/log(exp(1),2)),3/log(exp(1),2))
p <- ggplot(plot_df, aes(x=mean*log(exp(1),2), y = y, color = ct, alpha = sig)) + geom_point() + theme_classic() + 
  geom_vline(xintercept = 0.4*log(exp(1),2), linetype = 'dotted') + geom_vline(xintercept = -0.4*log(exp(1),2), linetype = 'dotted') +
  geom_label_repel(aes(label = label),nudge_x = 0.1,na.rm = TRUE, show.legend = F, max.overlaps = 20, label.padding = 0.1) + labs(color = 'Cell Type') + xlab('Estimated dendritic cell DE by CSIDE') + ylab('CSIDE p-value') + scale_y_continuous(lim = c(0,16.01), breaks = c(0,5,10,15),labels = c("10^0", "10^(-5)", "10^(-10)","10^(-15)") ) +  ggplot2::scale_color_manual("Cell type",values = my_pal_curr[c('B','DC','T_CD4','Macrophages')], breaks = c('B','DC','T_CD4','Macrophages'), labels = c('B','DC','T_CD4','Macrophages')) + scale_alpha_manual("", labels = c('Not significant', 'Significant'), values = c(0.2,1))
p