In this document, we reproduce panels from Figure 2 of our cerebellar cortex transcriptomic atlas manuscript.
library(Seurat)
# v2.3.4
library(ggplot2)
# Utility functions
source('../src/fig2_utils.R')
We first load annotated adult mouse cerebellar cortex data. We can retrieve this in raw count format from GEO (cb_adult_mouse tar). Download and expand this directory and copy the files into the data
directory of this repo.
# Read in full dataset (as mtx or RDS file)
# Raw counts can be downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE165371
# (cb_adult_mouse)
# Place in data directory
counts_cb = readMM('../data/cb_adult_mouse.mtx.gz')
barcodes_cb = readLines('../data/cb_adult_mouse_barcodes.txt')
genes_cb = readLines('../data/cb_adult_mouse_genes.txt')
row.names(counts_cb) = genes_cb
colnames(counts_cb) = barcodes_cb
# Create Seurat object if reading in raw data
full_cb = CreateSeuratObject(raw.data = counts_cb)
Alternatively, we can download a Seurat object with UMAP representation stored directly from the Single Cell Portal study. Download the cb_annotated_object.RDS
object from the Download tab of the study and copy into the data
directory. (Note that you will have to sign into SCP in order to access the Download tab.)
# Seurat object can be downloaded from SCP
# place in data directory
full_cb = readRDS('../data/cb_annotated_object.RDS')
We add in the cluster and subcluster annotations. This also includes additional metadata like sample sex. (Note that the SCP object also contains these annotations.)
# read in cluster metadata (available in data directory)
cluster_metadata = read.csv('../data/cluster_metadata.csv', header = T, row.names = 1)
full_cb@meta.data = cluster_metadata
# also read in metadata corresponding to object with granule cells downsampled to 60k
cluster_metadata_ds = read.csv('../data/cluster_metadata_ds.csv', header = T, row.names = 1,
stringsAsFactors = F)
# split up by analyses
cluster_metadata_ds[['analysis_cluster']] = sapply(cluster_metadata_ds$cluster, function(x) {
if (x %in% c('OPC', 'ODC')) {
return('OPC/ODC')
} else if (x %in% c('PLI', 'MLI1', 'MLI2')) {
return('MLI/PLI')
} else if (x %in% c('Macrophages', 'Microglia')){
return('Microglia')
} else if (x %in% c('Choroid', 'Ependymal')) {
return('Choroid')
} else if (x %in% c('Dcn_Fibroblasts', 'Endothelial_mural', 'Endothelial_stalk')) {
return('Endothelial/Fibroblasts')
} else {
return(x)
}
})
cluster_metadata_ds[['lob_comp_cluster']] = sapply(cluster_metadata_ds$subcluster, function(x) {
if (x %in% c('PLI_1', 'PLI_2', 'PLI_3')) {
return('PLI')
} else {
return(x)
}
})
cluster_metadata_ds$lob_comp_cluster = factor(cluster_metadata_ds$lob_comp_cluster)
cluster_metadata_ds$analysis_cluster = factor(cluster_metadata_ds$analysis_cluster)
# Prepare cerebellar cartoon polygons for plotting later panels
# Lobule coordinates available in data directory
coords_full = c('coordsI_half.csv', 'coordsII_half.csv', 'coordsIII_half.csv',
'coordscul_half.csv', 'coordsVI_half.csv', 'coordsVII_half.csv',
'coordsVIII_half.csv', 'coordsIX_half.csv', 'coordsX_half.csv',
'coordsan12.csv', 'coordsan22.csv', 'coordsprm2.csv',
'coordssim2.csv', 'coordscop2.csv', 'coordsf2.csv',
'coordspf2.csv')
coords_full = paste0('../data/coords/', coords_full)
coords_names = c('I', 'II', 'III', 'CUL', 'VI', 'VII', 'VIII', 'IX', 'X',
'AN1', 'AN2', 'PRM', 'SIM', 'COP', 'F', 'PF')
SPs = generate_SPs(coords_full, coords_names)
Spatial divergence computation and plotting.
celltypes_test = c('Purkinje', 'Granule', 'OPC/ODC', 'Golgi', 'UBC',
'MLI/PLI', 'Bergmann', 'Astrocyte' )
for (celltype in celltypes_test) {
celltype_sub = cluster_metadata_ds[cluster_metadata_ds$analysis_cluster == celltype,]
celltype_sub$lob_comp_cluster = droplevels(celltype_sub$lob_comp_cluster)
celltype_df = getScatterPlotData_meta(celltype_sub, celltype = celltype)
if (exists('full_scatter_df')) {
full_scatter_df = rbind(full_scatter_df, celltype_df)
} else {
full_scatter_df = celltype_df
}
}
full_scatter_df[['log_fc']] = log(full_scatter_df$fc_values, base = 2)
full_scatter_df[['log_p_values']] = -1 * log(full_scatter_df$p_values + 1e-250, base = 10)
full_scatter_df[['log_q_values']] = -1 * log(full_scatter_df$q_values + 1e-250, base = 10)
ggplot(data = full_scatter_df, aes(x = log_q_values, y = log_fc)) +
geom_point(data = full_scatter_df, aes(color = celltype), size = 2) +
xlab('-log10(q-value)') +
ylab('log(Max Region FC)') + theme(legend.title = element_blank()) +
guides(colour = guide_legend(override.aes = list(size=4))) +
geom_vline(xintercept = 3, linetype = 'dashed', col = 'grey')
# Subset to purkinjes
full_cb = SetIdent(full_cb, ident.use = full_cb@meta.data$subcluster)
purk_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = 'Purkinje',
subset.raw = T)
# remember to normalize data
purk_s = NormalizeData(purk_s)
genes_purkinje = rev(c("Aldoc",'Kctd16', 'Gpr176', 'Drd3', "Ephb2","Tshz2","Fam19a2",
"Alcam","Tox2","2900055J20Rik","Vmn2r30"))
# pdf("Figure2_panelB.pdf", useDingbats = F, width = 5, height = 3.5)
# use slightly customized dotplot function
DotPlot2(purk_s, genes_purkinje,
scale.by = 'radius',dot.min = 0.2, dot.scale = 6,
scale.min = 0.1,plot.legend = T, x.lab.rot = 45)
# dev.off()
# split into aldoc+ and aldoc-
purk_s@meta.data[['Aldoc_stat']] = sapply(as.character(purk_s@ident), function(x) {
split = strsplit(x, '_')[[1]]
if (split[2] == 'Anti') {
return('Aldoc_neg')
} else {
return('Aldoc_pos')
}
})
purk_s@meta.data$Aldoc_stat = factor(purk_s@meta.data$Aldoc_stat)
# plot spatial distributions
# pdf('Figure2_PanelC_left.pdf', useDingbats = F)
spatial_enrichment_map_meta(purk_s@meta.data, meta_col = 'Aldoc_stat', sps = SPs,
cluster = 'Aldoc_pos', do.print = F, color_divergent = T)
# dev.off()
# pdf('Figure2_PanelC_right.pdf', useDingbats = F)
spatial_enrichment_map_meta(purk_s@meta.data, meta_col = 'Aldoc_stat', sps = SPs,
cluster = 'Aldoc_neg', do.print = F, color_divergent = T)
# dev.off()
# plot spatial distributions of subclusters
# pdf('Figure2_panelC.pdf', useDingbats = F)
# spatial distributions for all purkinje clusters
for (clust in levels(purk_s@ident)) {
print(spatial_enrichment_map_meta(purk_s@meta.data, meta_col = 'subcluster', sps = SPs,
cluster = clust, do.print = F, color_divergent = T))
}
# dev.off()
# clean up
rm(purk_s)
gc()
gran_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = 'Granule',
subset.raw = T)
gran_s = NormalizeData(gran_s)
genes_gran = c("Chrm3","Nr4a2","Galntl6", 'Ebf1', "Gprin3", "Rasgrf1")
# pdf("Figure2_panelE.pdf",useDingbats = F, width = 5, height = 3.5)
DotPlot2(gran_s, genes_gran, dot.scale = 10,
scale.min = 0.1, plot.legend = T,
x.lab.rot = 45)
# dev.off()
# plot spatial distributions of subclusters
# pdf('Figure2_panelF.pdf', useDingbats = F)
# spatial distributions for all granule clusters
for (clust_i in 1:3) {
clust = levels(gran_s@ident)[clust_i]
print(spatial_enrichment_map_meta(gran_s@meta.data, meta_col = 'subcluster', sps = SPs,
cluster = clust, do.print = F, color_divergent = T))
}
# dev.off()
# clean up
rm(gran_s)
gc()
bergmann_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = 'Bergmann',
subset.raw = T)
bergmann_s = NormalizeData(bergmann_s)
# pdf('Figure2_panelG', useDingbats = F, height = 3, width = 4)
DotPlot2(bergmann_s, c('Wif1', 'Mybpc1'), dot.scale = 8,
scale.min = 0.1, plot.legend = T, scale.by = 'size',
x.lab.rot = 45)
# dev.off()
# plot spatial distributions of subclusters
# pdf('Figure2_panelH.pdf', useDingbats = F)
# spatial distributions for Bergmann cluster
spatial_enrichment_map_meta(bergmann_s@meta.data, cluster = 'Bergmann_2', meta_col = 'subcluster', sps = SPs,
do.print = F, color_divergent = T)
# dev.off()
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] sp_1.3-1 tidyr_0.8.3 dplyr_0.8.3 Seurat_2.3.4 Matrix_1.2-16
## [6] cowplot_0.9.4 ggplot2_3.1.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 class_7.3-15
## [4] modeltools_0.2-22 ggridges_0.5.1 mclust_5.4.3
## [7] htmlTable_1.13.1 base64enc_0.1-3 rstudioapi_0.10
## [10] proxy_0.4-23 npsurv_0.4-0 flexmix_2.3-15
## [13] bit64_0.9-7 mvtnorm_1.0-10 codetools_0.2-16
## [16] splines_3.5.3 R.methodsS3_1.7.1 lsei_1.2-0
## [19] robustbase_0.93-4 knitr_1.22 Formula_1.2-3
## [22] jsonlite_1.6 ica_1.0-2 cluster_2.0.7-1
## [25] kernlab_0.9-27 png_0.1-7 R.oo_1.22.0
## [28] compiler_3.5.3 httr_1.4.0 backports_1.1.4
## [31] assertthat_0.2.1 lazyeval_0.2.2 lars_1.2
## [34] acepack_1.4.1 htmltools_0.3.6 tools_3.5.3
## [37] igraph_1.2.4 gtable_0.3.0 glue_1.3.1
## [40] RANN_2.6.1 reshape2_1.4.3 Rcpp_1.0.3
## [43] trimcluster_0.1-2.1 gdata_2.18.0 ape_5.3
## [46] nlme_3.1-137 iterators_1.0.10 fpc_2.1-11.1
## [49] gbRd_0.4-11 lmtest_0.9-36 xfun_0.6
## [52] stringr_1.4.0 irlba_2.3.3 gtools_3.8.1
## [55] DEoptimR_1.0-8 MASS_7.3-51.1 zoo_1.8-5
## [58] scales_1.0.0 doSNOW_1.0.16 parallel_3.5.3
## [61] RColorBrewer_1.1-2 yaml_2.2.0 reticulate_1.11.1
## [64] pbapply_1.4-0 gridExtra_2.3 rpart_4.1-13
## [67] segmented_0.5-3.0 latticeExtra_0.6-28 stringi_1.4.3
## [70] foreach_1.4.4 checkmate_1.9.1 caTools_1.17.1.2
## [73] bibtex_0.4.2 Rdpack_0.10-1 SDMTools_1.1-221
## [76] rlang_0.4.1 pkgconfig_2.0.3 dtw_1.20-1
## [79] prabclus_2.2-7 bitops_1.0-6 evaluate_0.13
## [82] lattice_0.20-38 ROCR_1.0-7 purrr_0.3.3
## [85] labeling_0.3 htmlwidgets_1.3 bit_1.1-14
## [88] tidyselect_0.2.5 plyr_1.8.4 magrittr_1.5
## [91] R6_2.4.1 snow_0.4-3 gplots_3.0.1.1
## [94] Hmisc_4.2-0 pillar_1.4.2 foreign_0.8-71
## [97] withr_2.1.2 fitdistrplus_1.0-14 mixtools_1.1.0
## [100] survival_2.43-3 nnet_7.3-12 tsne_0.1-3
## [103] tibble_2.1.3 crayon_1.3.4 hdf5r_1.1.1
## [106] KernSmooth_2.23-15 rmarkdown_1.13 grid_3.5.3
## [109] data.table_1.12.2 metap_1.1 digest_0.6.19
## [112] diptest_0.75-7 R.utils_2.8.0 stats4_3.5.3
## [115] munsell_0.5.0