Unsupervised clustering on the Slide-seq cerebellum

library(RCTD)
library(Matrix)
library(ggplot2)
library(ggpubr)
library(gridExtra)
library(reshape2)
library(readr)
library(Seurat)

Find Marker Genes

get_marker_data <- function(cell_type_names, cell_type_means, gene_list) {
  marker_means = cell_type_means[gene_list,]
  marker_norm = marker_means / rowSums(marker_means)
  marker_data = as.data.frame(cell_type_names[max.col(marker_means)])
  marker_data$max_epr <- apply(cell_type_means[gene_list,],1,max)
  colnames(marker_data) = c("cell_type",'max_epr')
  rownames(marker_data) = gene_list
  marker_data$log_fc <- 0
  epsilon <- 1e-9
  for(cell_type in unique(marker_data$cell_type)) {
    cur_genes <- gene_list[marker_data$cell_type == cell_type]
    other_mean = rowMeans(cell_type_means[cur_genes,cell_type_names != cell_type])
    marker_data$log_fc[marker_data$cell_type == cell_type] <- log(epsilon + cell_type_means[cur_genes,cell_type]) - log(epsilon + other_mean)
  }
  return(marker_data)
}

myRCTD <- readRDS('../../myRCTD_cer.rds')
cur_cell_types <- c("Bergmann","Granule","Purkinje","MLI1","Oligodendrocytes")
puck <- myRCTD@spatialRNA
cell_type_info_restr = myRCTD@cell_type_info$info
cell_type_info_restr[[1]] = cell_type_info_restr[[1]][,cur_cell_types]
cell_type_info_restr[[2]] = cur_cell_types; cell_type_info_restr[[3]] = length(cur_cell_types)

de_genes <- get_de_genes(cell_type_info_restr, puck, fc_thresh = 3, expr_thresh = .0001, MIN_OBS = 3)
marker_data_de = get_marker_data(cell_type_info_restr[[2]], cell_type_info_restr[[1]], de_genes)
saveRDS(marker_data_de, '../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_standard.RDS')

Unsupervised Clustering

#given a puck object, returns a puck with counts filtered based on UMI threshold and gene list
restrict_counts <- function(puck, gene_list, UMI_thresh = 1, UMI_max = 20000) {
  keep_loc = (puck@nUMI >= UMI_thresh) & (puck@nUMI <= UMI_max)
  puck@counts = puck@counts[gene_list,keep_loc]
  if(length(puck@cell_labels) > 0) #check cell_labels non null
    puck@cell_labels = puck@cell_labels[keep_loc]
  puck@nUMI = puck@nUMI[keep_loc]
  return(puck)
}
config_data <- config::get(file = "../../conf/dataset.yml", use_parent = FALSE)
config <- config::get(file = paste0("../../conf/",config_data$config_mode,".yml"), use_parent = FALSE)
slideseqdir <- file.path("../../data/SpatialRNA",'NewCerPuck_190926_08')
resultsdir = file.path(slideseqdir,"results")
puck = readRDS(file.path(slideseqdir, config_data$puckrds))
puck <- restrict_counts(puck, rownames(puck@counts), UMI_thresh = 100)

target <- CreateSeuratObject(puck@counts, project = "Slideseq",assay = "RNA",min.cells = 0,min.features = 0,names.field = 1,names.delim = "_",meta.data = NULL)
target <- NormalizeData(target, verbose = FALSE)
target <- FindVariableFeatures(target, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
resultsdir = "Data/Slideseq/NewCerPuck_190926_08/SeuratResults/"
target <- ScaleData(target)
target <- RunPCA(target, assay = "RNA", verbose = FALSE)
target <- RunUMAP(target, dims = 1:30)
target <- FindNeighbors(target, dims = 1:30)
target <- FindClusters(target, resolution = 0.3, verbose = FALSE)
puck@cell_labels <- target@active.ident
n_clusters = length(levels(puck@cell_labels))

puck_seurat <- puck

Plot Bergmann and Purkinje clusters in marker gene space

puck <- readRDS('../Revisions/puck_seurat.rds')
marker_data_de <- readRDS('../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_standard.RDS')
berg_genes <- rownames(marker_data_de)[marker_data_de$cell_type == "Bergmann"]
purk_genes <- rownames(marker_data_de)[marker_data_de$cell_type == "Purkinje"]
my_ind = puck@cell_labels==5
plot_df <- data.frame(colSums(puck@counts[berg_genes, my_ind]) / puck@nUMI[my_ind], colSums(puck@counts[purk_genes, my_ind]) / puck@nUMI[my_ind])
colnames(plot_df) = c('Bergmann','Purkinje')
plot_df$type = "Purkinje"
plot_df_final <- plot_df[puck@nUMI[my_ind] >= 300,]
my_ind = puck@cell_labels==2
plot_df <- data.frame(colSums(puck@counts[berg_genes, my_ind]) / puck@nUMI[my_ind], colSums(puck@counts[purk_genes, my_ind]) / puck@nUMI[my_ind])
colnames(plot_df) = c('Bergmann','Purkinje')
plot_df$type = "Bergmann"
plot_df_final <- rbind(plot_df_final, plot_df[puck@nUMI[my_ind] >= 300,])
MULT <- 500
plot_df_final$Bergmann <- MULT * plot_df_final$Bergmann
plot_df_final$Purkinje <- MULT * plot_df_final$Purkinje
my_pal = pals::coolwarm(20)
p1 <- ggplot2::ggplot(plot_df_final) + 
  ggplot2::geom_point(ggplot2::aes(x=Bergmann,y=Purkinje, color = type),alpha = 0.2,size=1) + 
  ggplot2::coord_fixed() + ggplot2::theme_classic()  +
  labs(color="Cluster Cell Type") + guides(color = guide_legend(override.aes = list(size = 3,alpha=1))) + 
  scale_color_manual(values=c(my_pal[1], my_pal[20]))+ theme(legend.position="top") + xlab('Bergmann Markers') + ylab('Purkinje Markers') + scale_x_continuous(breaks = c(0,30,60), limits = c(0,60)) + scale_y_continuous(breaks = c(0,20,40), limits = c(0,40))
ggarrange(p1)

Plot granule clusters in space and compare to marker genes

marker_data_de <- readRDS('../../Data/SpatialRNA/NewCerPuck_190926_08/results/marker_data_de_standard.RDS')
my_mod <- function(p) {
  p + scale_x_continuous(breaks = c(1000,3000,5000), limits = c(900,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(1000,4900))+ theme(legend.position="top") + geom_segment(aes(x = 1300, y = 1700, xend = 1684.6, yend = 1700), color = "black")+  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())
}


my_pal = pals::brewer.blues(20)[2:20]
p2 <- plot_puck_continuous(puck, names(puck@cell_labels[puck@cell_labels == 0 | puck@cell_labels == 1]),  factor(puck@nUMI/puck@nUMI), ylimit = c(0,1))
p2 <- my_mod(p2) + scale_color_manual("",values=c(my_pal[19]),labels = c("Granule Clusters"))  + guides(color = guide_legend(override.aes = list(size = 3,alpha=1)))

cell_type = "Granule"
cur_range <- c(0,15)
MULT = 500
gran_genes <- rownames(marker_data_de[marker_data_de$cell_type == cell_type,])
p1 <- plot_puck_continuous(puck,colnames(puck@counts)[puck@nUMI >= 200],MULT*colSums(puck@counts[gran_genes,])/puck@nUMI, ylimit = cur_range)
p1 <- my_mod(p1) +ggplot2::scale_colour_gradientn(paste(cell_type,"Markers"), colors = my_pal, limits = cur_range, breaks = cur_range)
 
ggarrange(p1,p2)

#new plot
puck <- restrict_puck(puck, names(puck@cell_labels))
barcodes = names(puck@cell_labels)#puck@cell_labels %in% c(0,1)
my_table = puck@coords[barcodes,]
my_pal = c("#0072B2", "#D55E00")
names(my_pal) <- c("Correct", "Incorrect")
expr_level <- MULT*colSums(puck@counts[gran_genes,barcodes]) / puck@nUMI[barcodes]
my_table$class <- names(my_pal)[as.integer(expr_level < 5) + 1]
library(rdist)
dist_mat <- rdist(my_table[,c('x','y')])
dmat <- as.matrix(dist_mat)
proximate <- dmat < 40/0.65
results <- lapply(1:dim(dmat)[1], function(i) sum(my_table$class[which(proximate[i,])] == "Correct") > 5)
my_table$class_val <- names(my_pal)[2 - as.integer(unlist(results))]
my_table_plot <- my_table[puck@cell_labels %in% c(0,1),]
p1 <- ggplot2::ggplot(my_table_plot, ggplot2::aes(x=x, y=y)) + ggplot2::geom_point(ggplot2::aes(size = .1, shape=19,color=class_val)) + ggplot2::scale_color_manual("",values = my_pal)+ ggplot2::scale_shape_identity() + ggplot2::theme_classic() + ggplot2::scale_size_identity() + coord_fixed() + theme(legend.position="top")+ guides(colour = guide_legend(override.aes = list(size=2)))+ 
  scale_x_continuous(breaks = c(1000,3000,5000), limits = c(900,5600)) + scale_y_continuous(breaks = c(1000,3000,5000), limits = c(1000,4900))+ geom_segment(aes(x = 1300, y = 1700, xend = 1684.6, yend = 1700), color = "black")+  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(), axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())

my_table_plot$expr <- expr_level[rownames(my_table_plot)]
p3 <- ggplot(my_table_plot, aes(class_val, expr)) + geom_boxplot() #granule marker correct incorrect
p1