Figure 3: Cross-cluster continuity across neuronal populations

In this document, we reproduce panels from Figure 3 of our cerebellar cortex transcriptomic atlas manuscript.

Load utility functions and data

library(Seurat)
# v2.3.4
library(monocle)
# v2.10.1
library(ggplot2)
library(liger)

# Utility functions
source('../src/fig3_utils.R')

We first load annotated adult mouse cerebellar cortex data. We can retrieve this in raw count format from GEO (cb_adult_mouse tar). Download and expand this directory and copy the files into the data directory of this repo.

# Read in full dataset (as mtx or RDS file)
# Raw counts can be downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE165371
# (cb_adult_mouse)
# Place in data directory
counts_cb = readMM('../data/cb_adult_mouse.mtx.gz')
barcodes_cb = readLines('../data/cb_adult_mouse_barcodes.txt')
genes_cb = readLines('../data/cb_adult_mouse_genes.txt')

row.names(counts_cb) = genes_cb
colnames(counts_cb) = barcodes_cb

# Create Seurat object if reading in raw data
full_cb = CreateSeuratObject(raw.data = counts_cb)

Alternatively, we can download a Seurat object with annotation information directly from the Single Cell Portal study. Download the cb_annotated_object.RDS object from the Download tab of the study and copy into the data directory. (Note that you will have to sign into SCP in order to access the Download tab.)

# Seurat object can be downloaded from SCP
# place in data directory
full_cb = readRDS('../data/cb_annotated_object.RDS')

We add in the cluster and subcluster annotations. This also includes additional metadata like sample sex. (Note that the SCP object also contains these annotations.)

# read in cluster metadata (available in data directory)
cluster_metadata = read.csv('../data/cluster_metadata.csv', header = T, row.names = 1)
full_cb@meta.data = cluster_metadata

# also read in metadata corresponding to object with granule cells downsampled to 60k
cluster_metadata_ds = read.csv('../data/cluster_metadata_ds.csv', header = T, row.names = 1,
                               stringsAsFactors = F)

Compute cluster continuity for select populations

We can subset to appropriate clusters for comparison using initial annotated object and analyze continuity of gene expression across populations in these subobjects. This will produce similar results to those in Figure 3; to reproduce figures exactly, use indicated subobjects downloaded from SCP (place in data directory).

# Results for MLI1/MLI2
# mli_comb = SubsetData(full_cb, subset.name = 'cluster', accept.value = c('MLI1', 'MLI2'),
#                       subset.raw = T)
# mli_comb = SetIdent(mli_comb, ident.use = mli_comb@meta.data$cluster)
mli_comb = readRDS('../data/mli_ds_seurat.RDS')

mli_returned = generateObjectContinuityFits(mli_comb, 'MLI1', 'MLI2', use.raw.order = F,
                                            reverse_order = T)

## [1] "Calculating differential expression"
## [1] "Creating monocle object"

## Removing 469 outliers

## [1] "Calculating sigmoid fits"

# Results for MLI1/Golgi
# golgi_mli = SubsetData(full_cb, subset.name = 'cluster', accept.value = c('MLI1', 'Golgi'),
#                        subset.raw = T)
# golgi_mli = SetIdent(golgi_mli, ident.use = golgi_mli@meta.data$cluster)

golgi_mli = readRDS('../data/golgi_mli1_ds_seurat.RDS')

gmli_returned = generateObjectContinuityFits(golgi_mli, 'MLI1',  
                                             'Golgi', use.raw.order = F, reverse_order = T)

## [1] "Calculating differential expression"
## [1] "Creating monocle object"

## Removing 383 outliers

## [1] "Calculating sigmoid fits"

# Results for MLI1 subclusters
# mli1_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = c('MLI1'), subset.raw = T)
# mli1_s = SetIdent(mli1_s, ident.use = mli1_s@meta.data$subcluster)

mli1_s = readRDS('../data/mli1_ds_seurat.RDS')

mli1_returned = generateObjectContinuityFits(mli1_s, 'MLI1_1',  
                                             'MLI1_2', use.raw.order = F, reverse_order = T)

## [1] "Calculating differential expression"
## [1] "Creating monocle object"

## Removing 426 outliers

## [1] "Calculating sigmoid fits"

# Results for Golgi subclusters
# golgi_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = c('Golgi'), subset.raw = T)
# golgi_s = SetIdent(golgi_s, ident.use = golgi_s@meta.data$subcluster)

golgi_s = readRDS('../data/golgi_seurat.RDS')

golgi_returned = generateObjectContinuityFits(golgi_s, 'Golgi_1',  
                                              'Golgi_2', use.raw.order = F, reverse_order = F)

## [1] "Calculating differential expression"
## [1] "Creating monocle object"

## Removing 275 outliers

## [1] "Calculating sigmoid fits"

# Results for UBC subclusters
# ubc_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = c('UBC'), subset.raw = T)
# ubc_s = SetIdent(ubc_s, ident.use = ubc_s@meta.data$subcluster)

ubc_s = readRDS('../data/ubc_seurat.RDS')

ubc_returned = generateObjectContinuityFits(ubc_s, 'UBC_1',  
                                            'UBC_3', use.raw.order = F, reverse_order = F,
                                            return_monocle = T)

## [1] "Calculating differential expression"
## [1] "Creating monocle object"

## Removing 411 outliers

## [1] "Calculating sigmoid fits"

# Using top 200 genes ranked by spearman correlation
agg_results = makeCompareDF2(list(gmli_returned$fits,
                                  mli1_returned$fits, 
                                  mli_returned$fits,
                                  ubc_returned$fits,
                                  golgi_returned$fits),
                                n_cell_counts = c(5000, 5000, 5000, 1613, 3989),
                                names = c('MLI1_Golgi', 'MLI1_1_MLI1_2', 'MLI1_MLI2', 'UBC', 'Golgi'),
                                top_ranked_spearman = 200, converged_only = T, 
                                positive_v = T)

Panel A

# Plot representative genes
grm8 = plotFittedGene(mli1_returned$object, 'Grm8', factor_order = mli1_returned[[2]], 
                        fitparams = mli1_returned[[1]], return.plot = T, use_scale = T, do.legend = F)
grm8 = grm8 + scale_color_manual(values = c('#FBBBFF', '#C13EB2')) + 
  ggtitle(paste0('Grm8 (m = ', round(agg_results['Grm8', 'mid_slope'], 2), ')'))
npas3 = plotFittedGene(mli1_returned$object, 'Npas3', factor_order = mli1_returned[[2]], 
                        fitparams = mli1_returned[[1]], return.plot = T, use_scale = T, do.legend = F)

## [1] "Order was reversed as negative correlation"

npas3 = npas3 + scale_color_manual(values = c('#FBBBFF', '#C13EB2')) + 
  ggtitle(paste0('Npas3 (m = ', round(agg_results['Npas3', 'mid_slope'], 2), ')'))

ptprk = plotFittedGene(mli_returned$object, 'Ptprk', factor_order = mli_returned[[2]], 
                        fitparams = mli_returned[[1]], return.plot = T, use_scale = T, do.legend = F)

## [1] "Order was reversed as negative correlation"

ptprk = ptprk + scale_color_manual(values = c('#C13EB2', '#3EC14D')) + 
  ggtitle(paste0('Ptprk (m = ', round(agg_results['Ptprk', 'mid_slope'], 2), ')'))

nxph1 = plotFittedGene(mli_returned$object, 'Nxph1', factor_order = mli_returned[[2]], 
                        fitparams = mli_returned[[1]], return.plot = T, use_scale = T, do.legend = F)
nxph1 = nxph1 + scale_color_manual(values = c('#C13EB2', '#3EC14D')) + 
  ggtitle(paste0('Nxph1 (m = ', round(agg_results['Nxph1', 'mid_slope'], 2), ')'))


# pdf('Figure3_panelA.pdf', useDingbats = F, width = 4, height = 9)
plot_grid(grm8, ptprk, npas3, nxph1, ncol = 2, align = 'v', axis = 'l')

# dev.off()

Panel B

# pdf('Figure3_panelB.pdf', useDingbats = F,  width = 7, height = 4)
ggplot(agg_results, aes(x = log_midslope, col = dataset)) + stat_ecdf() +
  xlab('log10(m)') + ylab('Proportion of DE genes') + theme(legend.title = element_blank())

# dev.off()

Panel C

# Convert to liger object for plotting
ubc_split = SplitObject(ubc_s, attribute.1 = 'sex', do.clean = T)

ubc_l = createLiger(raw.data = list(male = ubc_split$male@raw.data,
                                     female = ubc_split$female@raw.data),
                     remove.missing = F)
ubc_l = liger::normalize(ubc_l)

# pass in tSNE coordinates and pseudotime
ubc_l@tsne.coords = as.matrix(ubc_s@dr$tsne@cell.embeddings[row.names(ubc_l@cell.data),])
ubc_l@cell.data[['pseudotime']] = ubc_returned$monocle@phenoData@data[row.names(ubc_l@cell.data), 'Pseudotime']

pseudo_plot = plotFeature(ubc_l, 'pseudotime', by.dataset = F, pt.size = 0.005 ,return.plots = T)
pseudo_plot = pseudo_plot + scale_color_gradient(low = 'grey', high = 'orangered')

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

pseudo_plot = pseudo_plot + theme(
  axis.line = element_blank(), axis.text.x = element_blank(),
  axis.text.y = element_blank(), axis.ticks = element_blank(),
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.background = element_blank(), panel.border = element_blank(),
  panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
  plot.background = element_blank(), plot.title = element_blank())

ubc_plcb4 = plotGene_ordered(ubc_l, 'Plcb4', plot.by = 'none', points.only = T, do.legend = T, 
                          pt.size = 0.005, return.plots = T)

ubc_plcb1 = plotGene_ordered(ubc_l, 'Plcb1', plot.by = 'none', points.only = T, do.legend = T, pt.size = 0.005,
                          return.plots = T)
ubc_eomes = plotGene_ordered(ubc_l, 'Eomes', plot.by = 'none', points.only = T, do.legend = T, pt.size = 0.005,
                          return.plots = T)
ubc_grm1 = plotGene_ordered(ubc_l, 'Grm1', plot.by = 'none', points.only = T, do.legend = T, pt.size = 0.005,
                          return.plots = T)
ubc_calb2 = plotGene_ordered(ubc_l, 'Calb2', plot.by = 'none', points.only = T, do.legend = T, pt.size = 0.005,
                          return.plots = T)

# pdf('Figure3_panelC.pdf', useDingbats = F, width = 3.5, height = 4.5)
plot_grid(pseudo_plot, ubc_eomes, ubc_grm1, ubc_calb2, ubc_plcb4, ubc_plcb1, ncol = 2)

# dev.off()

Panel D

# Plot representative genes

# color by pseudotime
ubc_returned$object@meta.data[['pseudotime']] = ubc_returned$monocle@phenoData@data[row.names(ubc_returned$object@meta.data), 
                                                                                    'Pseudotime']

grm1 = plotFittedGene(ubc_returned$object, 'Grm1', factor_order = ubc_returned$pseudo_order, color_by_cluster = F,
                      continuous_color = 'pseudotime', use_gradient = T,
                        fitparams = ubc_returned[[1]], return.plot = T, use_scale = T, do.legend = T)

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

grm1 = grm1 +  ggtitle(paste0('Grm1 (m = ', round(agg_results['Grm11', 'mid_slope'], 2), ')'))

calb2 = plotFittedGene(ubc_returned$object, 'Calb2', factor_order = ubc_returned$pseudo_order, color_by_cluster = F,
                      continuous_color = 'pseudotime', use_gradient = T,
                        fitparams = ubc_returned[[1]], return.plot = T, use_scale = T, do.legend = T)

## [1] "Order was reversed as negative correlation"

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

calb2 = calb2 +  ggtitle(paste0('Calb2 (m = ', round(agg_results['Calb2', 'mid_slope'], 2), ')'))

plcb4 = plotFittedGene(ubc_returned$object, 'Plcb4', factor_order = ubc_returned$pseudo_order, color_by_cluster = F,
                      continuous_color = 'pseudotime', use_gradient = T,
                        fitparams = ubc_returned[[1]], return.plot = T, use_scale = T, do.legend = T)

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

plcb4 = plcb4 +  ggtitle(paste0('Plcb4 (m = ', round(agg_results['Plcb4', 'mid_slope'], 2), ')'))

plcb1 = plotFittedGene(ubc_returned$object, 'Plcb1', factor_order = ubc_returned$pseudo_order, color_by_cluster = F,
                      continuous_color = 'pseudotime', use_gradient = T,
                        fitparams = ubc_returned[[1]], return.plot = T, use_scale = T, do.legend = T)

## [1] "Order was reversed as negative correlation"

## Scale for 'colour' is already present. Adding another scale for
## 'colour', which will replace the existing scale.

plcb1 = plcb1 +  ggtitle(paste0('Plcb1 (m = ', round(agg_results['Plcb1', 'mid_slope'], 2), ')'))

# pdf('Figure3_panelD.pdf', useDingbats = F, width = 4, height = 9)
plot_grid(grm1, calb2, plcb4, plcb1, ncol = 1, align = 'v', axis = 'l')

# dev.off()

sessionInfo()

## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] splines   stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] pbapply_1.4-0       liger_0.4.2         patchwork_0.0.1    
##  [4] monocle_2.10.1      DDRTree_0.1.5       irlba_2.3.3        
##  [7] VGAM_1.1-1          Biobase_2.42.0      BiocGenerics_0.28.0
## [10] Seurat_2.3.4        Matrix_1.2-16       cowplot_0.9.4      
## [13] ggplot2_3.1.1      
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15           colorspace_1.4-1     class_7.3-15        
##   [4] modeltools_0.2-22    ggridges_0.5.1       mclust_5.4.3        
##   [7] htmlTable_1.13.1     base64enc_0.1-3      rstudioapi_0.10     
##  [10] proxy_0.4-23         npsurv_0.4-0         riverplot_0.6       
##  [13] ggrepel_0.8.0        flexmix_2.3-15       bit64_0.9-7         
##  [16] mvtnorm_1.0-10       codetools_0.2-16     R.methodsS3_1.7.1   
##  [19] docopt_0.6.1         lsei_1.2-0           robustbase_0.93-4   
##  [22] knitr_1.22           Formula_1.2-3        jsonlite_1.6        
##  [25] ica_1.0-2            cluster_2.0.7-1      kernlab_0.9-27      
##  [28] png_0.1-7            R.oo_1.22.0          pheatmap_1.0.12     
##  [31] compiler_3.5.3       httr_1.4.0           backports_1.1.4     
##  [34] assertthat_0.2.1     lazyeval_0.2.2       limma_3.38.3        
##  [37] lars_1.2             acepack_1.4.1        htmltools_0.3.6     
##  [40] tools_3.5.3          igraph_1.2.4         gtable_0.3.0        
##  [43] glue_1.3.1           RANN_2.6.1           reshape2_1.4.3      
##  [46] dplyr_0.8.3          Rcpp_1.0.3           slam_0.1-45         
##  [49] trimcluster_0.1-2.1  gdata_2.18.0         ape_5.3             
##  [52] nlme_3.1-137         iterators_1.0.10     fpc_2.1-11.1        
##  [55] gbRd_0.4-11          lmtest_0.9-36        xfun_0.6            
##  [58] stringr_1.4.0        gtools_3.8.1         DEoptimR_1.0-8      
##  [61] MASS_7.3-51.1        zoo_1.8-5            scales_1.0.0        
##  [64] doSNOW_1.0.16        RColorBrewer_1.1-2   yaml_2.2.0          
##  [67] reticulate_1.11.1    gridExtra_2.3        rpart_4.1-13        
##  [70] segmented_0.5-3.0    fastICA_1.2-2        latticeExtra_0.6-28 
##  [73] stringi_1.4.3        foreach_1.4.4        checkmate_1.9.1     
##  [76] caTools_1.17.1.2     densityClust_0.3     bibtex_0.4.2        
##  [79] matrixStats_0.54.0   Rdpack_0.10-1        SDMTools_1.1-221    
##  [82] rlang_0.4.1          pkgconfig_2.0.3      dtw_1.20-1          
##  [85] prabclus_2.2-7       bitops_1.0-6         qlcMatrix_0.9.7     
##  [88] RANN.L1_2.5.2        evaluate_0.13        lattice_0.20-38     
##  [91] ROCR_1.0-7           purrr_0.3.3          labeling_0.3        
##  [94] htmlwidgets_1.3      bit_1.1-14           tidyselect_0.2.5    
##  [97] plyr_1.8.4           magrittr_1.5         R6_2.4.1            
## [100] snow_0.4-3           gplots_3.0.1.1       Hmisc_4.2-0         
## [103] combinat_0.0-8       pillar_1.4.2         foreign_0.8-71      
## [106] withr_2.1.2          fitdistrplus_1.0-14  mixtools_1.1.0      
## [109] survival_2.43-3      nnet_7.3-12          tsne_0.1-3          
## [112] tibble_2.1.3         crayon_1.3.4         hdf5r_1.1.1         
## [115] KernSmooth_2.23-15   rmarkdown_1.13       viridis_0.5.1       
## [118] grid_3.5.3           data.table_1.12.2    FNN_1.1.3           
## [121] sparsesvd_0.1-4      HSMMSingleCell_1.2.0 metap_1.1           
## [124] digest_0.6.19        diptest_0.75-7       tidyr_0.8.3         
## [127] R.utils_2.8.0        munsell_0.5.0        viridisLite_0.3.0