Figure 2: Characterization of spatial variation and patterning

In this document, we reproduce panels from Figure 2 of our cerebellar cortex transcriptomic atlas manuscript.

Load utility functions and data

library(Seurat)
# v2.3.4
library(ggplot2)

# Utility functions
source('../src/fig2_utils.R')

We first load annotated adult mouse cerebellar cortex data. We can retrieve this in raw count format from GEO (cb_adult_mouse tar). Download and expand this directory and copy the files into the data directory of this repo.

# Read in full dataset (as mtx or RDS file)
# Raw counts can be downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE165371
# (cb_adult_mouse)
# Place in data directory
counts_cb = readMM('../data/cb_adult_mouse.mtx.gz')
barcodes_cb = readLines('../data/cb_adult_mouse_barcodes.txt')
genes_cb = readLines('../data/cb_adult_mouse_genes.txt')

row.names(counts_cb) = genes_cb
colnames(counts_cb) = barcodes_cb

# Create Seurat object if reading in raw data
full_cb = CreateSeuratObject(raw.data = counts_cb)

Alternatively, we can download a Seurat object with UMAP representation stored directly from the Single Cell Portal study. Download the cb_annotated_object.RDS object from the Download tab of the study and copy into the data directory. (Note that you will have to sign into SCP in order to access the Download tab.)

# Seurat object can be downloaded from SCP
# place in data directory
full_cb = readRDS('../data/cb_annotated_object.RDS')

We add in the cluster and subcluster annotations. This also includes additional metadata like sample sex. (Note that the SCP object also contains these annotations.)

# read in cluster metadata (available in data directory)
cluster_metadata = read.csv('../data/cluster_metadata.csv', header = T, row.names = 1)
full_cb@meta.data = cluster_metadata

# also read in metadata corresponding to object with granule cells downsampled to 60k
cluster_metadata_ds = read.csv('../data/cluster_metadata_ds.csv', header = T, row.names = 1,
                               stringsAsFactors = F)

# split up by analyses 
cluster_metadata_ds[['analysis_cluster']] = sapply(cluster_metadata_ds$cluster, function(x) {
  if (x %in% c('OPC', 'ODC')) {
    return('OPC/ODC')
  } else if (x %in% c('PLI', 'MLI1', 'MLI2')) {
    return('MLI/PLI')
  } else if (x %in% c('Macrophages', 'Microglia')){
    return('Microglia')
  } else if (x %in% c('Choroid', 'Ependymal')) {
    return('Choroid')
  } else if (x %in% c('Dcn_Fibroblasts', 'Endothelial_mural', 'Endothelial_stalk')) {
    return('Endothelial/Fibroblasts')
  } else {
    return(x)
  }
})

cluster_metadata_ds[['lob_comp_cluster']] = sapply(cluster_metadata_ds$subcluster, function(x) {
  if (x %in% c('PLI_1', 'PLI_2', 'PLI_3')) {
    return('PLI')
  } else {
    return(x)
  }
})
cluster_metadata_ds$lob_comp_cluster = factor(cluster_metadata_ds$lob_comp_cluster)
cluster_metadata_ds$analysis_cluster = factor(cluster_metadata_ds$analysis_cluster)

# Prepare cerebellar cartoon polygons for plotting later panels
# Lobule coordinates available in data directory
coords_full = c('coordsI_half.csv',  'coordsII_half.csv', 'coordsIII_half.csv', 
                'coordscul_half.csv', 'coordsVI_half.csv', 'coordsVII_half.csv',
                'coordsVIII_half.csv', 'coordsIX_half.csv', 'coordsX_half.csv',
                'coordsan12.csv', 'coordsan22.csv', 'coordsprm2.csv', 
                'coordssim2.csv', 'coordscop2.csv', 'coordsf2.csv', 
                'coordspf2.csv')

coords_full = paste0('../data/coords/', coords_full)

coords_names = c('I', 'II', 'III', 'CUL', 'VI', 'VII', 'VIII', 'IX', 'X',
                 'AN1', 'AN2', 'PRM', 'SIM', 'COP', 'F', 'PF')

SPs = generate_SPs(coords_full, coords_names)

Panel A

Spatial divergence computation and plotting.

celltypes_test = c('Purkinje', 'Granule', 'OPC/ODC', 'Golgi', 'UBC',
                   'MLI/PLI', 'Bergmann', 'Astrocyte' )

for (celltype in celltypes_test) {
  celltype_sub = cluster_metadata_ds[cluster_metadata_ds$analysis_cluster == celltype,]
  celltype_sub$lob_comp_cluster = droplevels(celltype_sub$lob_comp_cluster)
  celltype_df = getScatterPlotData_meta(celltype_sub, celltype = celltype)
  
  if (exists('full_scatter_df')) {
    full_scatter_df = rbind(full_scatter_df, celltype_df)
  } else {
    full_scatter_df = celltype_df
  }
}

full_scatter_df[['log_fc']] = log(full_scatter_df$fc_values, base = 2)
full_scatter_df[['log_p_values']] = -1 * log(full_scatter_df$p_values + 1e-250, base = 10)
full_scatter_df[['log_q_values']] = -1 * log(full_scatter_df$q_values + 1e-250, base = 10)

ggplot(data = full_scatter_df, aes(x = log_q_values, y = log_fc)) + 
  geom_point(data = full_scatter_df, aes(color = celltype), size = 2) +
  xlab('-log10(q-value)') +
  ylab('log(Max Region FC)') + theme(legend.title = element_blank()) +
  guides(colour = guide_legend(override.aes = list(size=4))) + 
  geom_vline(xintercept = 3, linetype = 'dashed', col = 'grey')

Panel B (Purkinje cells)

# Subset to purkinjes
full_cb = SetIdent(full_cb, ident.use = full_cb@meta.data$subcluster)
purk_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = 'Purkinje', 
                    subset.raw = T)

#  remember to normalize data
purk_s = NormalizeData(purk_s)
genes_purkinje = rev(c("Aldoc",'Kctd16', 'Gpr176', 'Drd3', "Ephb2","Tshz2","Fam19a2",
                       "Alcam","Tox2","2900055J20Rik","Vmn2r30"))

# pdf("Figure2_panelB.pdf", useDingbats = F, width = 5, height = 3.5)
# use slightly customized dotplot function
DotPlot2(purk_s, genes_purkinje,
         scale.by = 'radius',dot.min = 0.2, dot.scale = 6,
         scale.min = 0.1,plot.legend = T, x.lab.rot = 45)

# dev.off()

Panel C (Aldoc+ and - distributions)

# split into aldoc+ and aldoc-
purk_s@meta.data[['Aldoc_stat']] = sapply(as.character(purk_s@ident), function(x) {
  split = strsplit(x, '_')[[1]]
  if (split[2] == 'Anti') {
    return('Aldoc_neg')
  } else {
    return('Aldoc_pos')
  }
})

purk_s@meta.data$Aldoc_stat = factor(purk_s@meta.data$Aldoc_stat)
# plot spatial distributions
# pdf('Figure2_PanelC_left.pdf', useDingbats = F)
spatial_enrichment_map_meta(purk_s@meta.data, meta_col = 'Aldoc_stat', sps = SPs, 
                            cluster = 'Aldoc_pos', do.print = F, color_divergent = T)

# dev.off()

# pdf('Figure2_PanelC_right.pdf', useDingbats = F)
spatial_enrichment_map_meta(purk_s@meta.data, meta_col = 'Aldoc_stat', sps = SPs,
                            cluster = 'Aldoc_neg', do.print = F, color_divergent = T)

# dev.off()

Panel D (Purkinje subcluster distributions)

# plot spatial distributions of subclusters
# pdf('Figure2_panelC.pdf', useDingbats = F)
# spatial distributions for all purkinje clusters 
for (clust in levels(purk_s@ident)) {
  print(spatial_enrichment_map_meta(purk_s@meta.data, meta_col = 'subcluster', sps = SPs,
                                    cluster = clust, do.print = F, color_divergent = T))
}

# dev.off()

Panel E (Granule cells)

# clean up
rm(purk_s)
gc()

gran_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = 'Granule',
                    subset.raw = T)

gran_s = NormalizeData(gran_s)
genes_gran = c("Chrm3","Nr4a2","Galntl6", 'Ebf1', "Gprin3", "Rasgrf1")

# pdf("Figure2_panelE.pdf",useDingbats = F, width = 5, height = 3.5)
DotPlot2(gran_s, genes_gran, dot.scale = 10,
         scale.min = 0.1, plot.legend = T,
         x.lab.rot = 45)

# dev.off()

Panel F (Granule subcluster distributions)

# plot spatial distributions of subclusters

# pdf('Figure2_panelF.pdf', useDingbats = F)
# spatial distributions for all granule clusters 
for (clust_i in 1:3) {
  clust = levels(gran_s@ident)[clust_i]
  print(spatial_enrichment_map_meta(gran_s@meta.data, meta_col = 'subcluster', sps = SPs,
                                    cluster = clust, do.print = F, color_divergent = T))
}

# dev.off()

Panel G (Bergmann glia)

# clean up
rm(gran_s)
gc()

bergmann_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = 'Bergmann',
                    subset.raw = T)

bergmann_s = NormalizeData(bergmann_s)

# pdf('Figure2_panelG', useDingbats = F, height = 3, width = 4)
DotPlot2(bergmann_s, c('Wif1', 'Mybpc1'), dot.scale = 8,
         scale.min = 0.1, plot.legend = T, scale.by = 'size',
         x.lab.rot = 45)

# dev.off()

Panel H (Bergmann glia subcluster distributions)

# plot spatial distributions of subclusters

# pdf('Figure2_panelH.pdf', useDingbats = F)
# spatial distributions for Bergmann cluster 
spatial_enrichment_map_meta(bergmann_s@meta.data, cluster = 'Bergmann_2', meta_col = 'subcluster', sps = SPs,
                       do.print = F, color_divergent = T)

# dev.off()

sessionInfo()

## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] sp_1.3-1      tidyr_0.8.3   dplyr_0.8.3   Seurat_2.3.4  Matrix_1.2-16
## [6] cowplot_0.9.4 ggplot2_3.1.1
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15          colorspace_1.4-1    class_7.3-15       
##   [4] modeltools_0.2-22   ggridges_0.5.1      mclust_5.4.3       
##   [7] htmlTable_1.13.1    base64enc_0.1-3     rstudioapi_0.10    
##  [10] proxy_0.4-23        npsurv_0.4-0        flexmix_2.3-15     
##  [13] bit64_0.9-7         mvtnorm_1.0-10      codetools_0.2-16   
##  [16] splines_3.5.3       R.methodsS3_1.7.1   lsei_1.2-0         
##  [19] robustbase_0.93-4   knitr_1.22          Formula_1.2-3      
##  [22] jsonlite_1.6        ica_1.0-2           cluster_2.0.7-1    
##  [25] kernlab_0.9-27      png_0.1-7           R.oo_1.22.0        
##  [28] compiler_3.5.3      httr_1.4.0          backports_1.1.4    
##  [31] assertthat_0.2.1    lazyeval_0.2.2      lars_1.2           
##  [34] acepack_1.4.1       htmltools_0.3.6     tools_3.5.3        
##  [37] igraph_1.2.4        gtable_0.3.0        glue_1.3.1         
##  [40] RANN_2.6.1          reshape2_1.4.3      Rcpp_1.0.3         
##  [43] trimcluster_0.1-2.1 gdata_2.18.0        ape_5.3            
##  [46] nlme_3.1-137        iterators_1.0.10    fpc_2.1-11.1       
##  [49] gbRd_0.4-11         lmtest_0.9-36       xfun_0.6           
##  [52] stringr_1.4.0       irlba_2.3.3         gtools_3.8.1       
##  [55] DEoptimR_1.0-8      MASS_7.3-51.1       zoo_1.8-5          
##  [58] scales_1.0.0        doSNOW_1.0.16       parallel_3.5.3     
##  [61] RColorBrewer_1.1-2  yaml_2.2.0          reticulate_1.11.1  
##  [64] pbapply_1.4-0       gridExtra_2.3       rpart_4.1-13       
##  [67] segmented_0.5-3.0   latticeExtra_0.6-28 stringi_1.4.3      
##  [70] foreach_1.4.4       checkmate_1.9.1     caTools_1.17.1.2   
##  [73] bibtex_0.4.2        Rdpack_0.10-1       SDMTools_1.1-221   
##  [76] rlang_0.4.1         pkgconfig_2.0.3     dtw_1.20-1         
##  [79] prabclus_2.2-7      bitops_1.0-6        evaluate_0.13      
##  [82] lattice_0.20-38     ROCR_1.0-7          purrr_0.3.3        
##  [85] labeling_0.3        htmlwidgets_1.3     bit_1.1-14         
##  [88] tidyselect_0.2.5    plyr_1.8.4          magrittr_1.5       
##  [91] R6_2.4.1            snow_0.4-3          gplots_3.0.1.1     
##  [94] Hmisc_4.2-0         pillar_1.4.2        foreign_0.8-71     
##  [97] withr_2.1.2         fitdistrplus_1.0-14 mixtools_1.1.0     
## [100] survival_2.43-3     nnet_7.3-12         tsne_0.1-3         
## [103] tibble_2.1.3        crayon_1.3.4        hdf5r_1.1.1        
## [106] KernSmooth_2.23-15  rmarkdown_1.13      grid_3.5.3         
## [109] data.table_1.12.2   metap_1.1           digest_0.6.19      
## [112] diptest_0.75-7      R.utils_2.8.0       stats4_3.5.3       
## [115] munsell_0.5.0