In this document, we reproduce panels from Figure 1 of our cerebellar cortex transcriptomic atlas manuscript.
library(Seurat)
# v2.3.4
library(Matrix)
library(Matrix.utils)
library(dendsort)
# Define an extra utility function
Matrix.column_norm <- function(A){
if (class(A)[1] == "dgTMatrix") {
temp = summary(A)
col.names = colnames(A)
row.names = rownames(A)
A = sparseMatrix(i=temp[,1],j=temp[,2],x=temp[,3])
rownames(A) = row.names
colnames(A) = col.names
}
A@x <- A@x / rep.int(Matrix::colSums(A), diff(A@p))
return(A)
}
We first load annotated adult mouse cerebellar cortex data. We can retrieve this in raw count format from GEO (cb_adult_mouse tar). Download and expand this directory and copy the files into the data
directory of this repo.
# Read in full dataset (as mtx or RDS file)
# Raw counts can be downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE165371
# (cb_adult_mouse)
# Place in data directory
counts_cb = readMM('../data/cb_adult_mouse.mtx.gz')
barcodes_cb = readLines('../data/cb_adult_mouse_barcodes.txt')
genes_cb = readLines('../data/cb_adult_mouse_genes.txt')
row.names(counts_cb) = genes_cb
colnames(counts_cb) = barcodes_cb
# Create Seurat object if reading in raw data
full_cb = CreateSeuratObject(raw.data = counts_cb)
If using raw count data, we will have to preprocess and compute the UMAP representation as shown here.
# If using raw data, preprocess to generate UMAP representation
# If using SCP object, this cell can be skipped (obj already has UMAP representation stored)
full_cb = NormalizeData(full_cb)
# basic dimensionality reduction for coarse cell type visualization
full_cb = FindVariableGenes(full_cb, do.plot = F) # top 2000 highly variable genes
full_cb = ScaleData(full_cb, genes.use = full_cb@var.genes)
full_cb = RunPCA(full_cb, pcs.compute = 50, do.print = F)
full_cb = RunUMAP(full_cb, dims.use = 1:25)
Alternatively, we can download a Seurat object with UMAP representation stored directly from the Single Cell Portal study. Download the cb_annotated_object.RDS
object from the Download tab of the study and copy into the data
directory. (Note that you will have to sign into SCP in order to access the Download tab.)
# Seurat object can be downloaded from SCP
# place in data directory
full_cb = readRDS('../data/cb_annotated_object.RDS')
We add in the cluster and subcluster annotations. This also includes additional metadata like sample sex.
# read in cluster metadata (available in data directory)
cluster_metadata = read.csv('../data/cluster_metadata.csv', header = T, row.names = 1)
full_cb@meta.data = cluster_metadata
Reproducing UMAP representation:
# UMAP by cluster
# png("Figure1_panelB.png", units="in", width=6, height=5, res=400)
DimPlot(full_cb, reduction.use = 'umap', do.label = T, group.by = "cluster",
no.legend = T, no.axes = T, pt.size = 0.5)
# dev.off()
Reproducing dendrogram and marker gene dotplot.
# read in metadata corresponding to object with granule cells downsampled to 60k
cluster_metadata_ds = read.csv('../data/cluster_metadata_ds.csv', header = T, row.names = 1)
# read in union of variable genes determined in cluster subanalyses (used for dendrogram)
var_gene_union = readLines('../data/var_genes_union.txt')
ds_gran_cb = SubsetData(full_cb, cells.use = row.names(cluster_metadata_ds), subset.raw = T)
ds_gran_cb = SetIdent(ds_gran_cb, ident.use = ds_gran_cb@meta.data$subcluster)
# generate dendrogram
dge.transpose = Matrix::t(ds_gran_cb@raw.data)
metacells = aggregate.Matrix(dge.transpose, groupings = ds_gran_cb@ident, fun="sum")
metacells.norm = Matrix::t(Matrix.column_norm(Matrix::t(metacells)))
metacells.scaled = scale(as.matrix(metacells.norm))
colnames(metacells.scaled) = row.names(ds_gran_cb@raw.data)
# subset to just relevant genes
m = metacells.scaled[, var_gene_union]
# compute gene based distance and hierarchical clusters
# (Weighted Pair Group Method with arithmetic mean)
dist.mat = dist(m)
hier = dendsort(hclust(dist.mat, method = 'mcquitty'))
# Figure 1c dendrogram only
# Note that final dendrogram was rotated in paper
# pdf("Figure1_panelC_dendrogram.pdf",useDingbats = F, width = 5, height = 4)
plot(hier, labels=F, hang=-1)
# dev.off()
# Genes for dotplot
genes_plot = rev(c('Ppp1r17', 'Gabra6', 'Eomes', 'Lypd6', 'Prkcd', 'Klhl1',
'Lgi2', 'Gdf10', 'Aqp4',
'Mobp', 'Ppfibp1',
'Dcn', 'Kcnj8', 'Ttr', 'Mrc1', 'C1qa',
'Flt1', 'Foxj1'))
# Order subclusters as in dendrogram
# There may be slight discrepancies in subcluster ordering depending on version of dendsort
ordered_ident = factor(ds_gran_cb@meta.data$subcluster, levels = row.names(m)[hier$order])
ds_gran_cb@ident = ordered_ident
ds_gran_cb = NormalizeData(ds_gran_cb)
dotplot = Seurat::DotPlot(ds_gran_cb, genes.plot = genes_plot, dot.min = 0.08, scale.by = 'radius',
x.lab.rot = T, dot.scale = 6, plot.legend = T, do.return = T)
dotplot = dotplot + theme(axis.text.x=element_text(angle=45,hjust = 0.95, vjust=0.95))
# Figure 1C dotplot
# pdf('Figure1_panelC_dotplot.pdf', useDingbats = F, width = 9, height = 8)
dotplot
# dev.off()
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dendsort_0.3.3 Matrix.utils_0.9.7 Seurat_2.3.4
## [4] Matrix_1.2-16 cowplot_0.9.4 ggplot2_3.1.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 grr_0.9.5
## [4] class_7.3-15 modeltools_0.2-22 ggridges_0.5.1
## [7] mclust_5.4.3 htmlTable_1.13.1 base64enc_0.1-3
## [10] rstudioapi_0.10 proxy_0.4-23 npsurv_0.4-0
## [13] flexmix_2.3-15 bit64_0.9-7 mvtnorm_1.0-10
## [16] codetools_0.2-16 splines_3.5.3 R.methodsS3_1.7.1
## [19] lsei_1.2-0 robustbase_0.93-4 knitr_1.22
## [22] Formula_1.2-3 jsonlite_1.6 ica_1.0-2
## [25] cluster_2.0.7-1 kernlab_0.9-27 png_0.1-7
## [28] R.oo_1.22.0 compiler_3.5.3 httr_1.4.0
## [31] backports_1.1.4 assertthat_0.2.1 lazyeval_0.2.2
## [34] lars_1.2 acepack_1.4.1 htmltools_0.3.6
## [37] tools_3.5.3 igraph_1.2.4 gtable_0.3.0
## [40] glue_1.3.1 RANN_2.6.1 reshape2_1.4.3
## [43] dplyr_0.8.3 Rcpp_1.0.3 trimcluster_0.1-2.1
## [46] gdata_2.18.0 ape_5.3 nlme_3.1-137
## [49] iterators_1.0.10 fpc_2.1-11.1 gbRd_0.4-11
## [52] lmtest_0.9-36 xfun_0.6 stringr_1.4.0
## [55] irlba_2.3.3 gtools_3.8.1 DEoptimR_1.0-8
## [58] MASS_7.3-51.1 zoo_1.8-5 scales_1.0.0
## [61] doSNOW_1.0.16 parallel_3.5.3 RColorBrewer_1.1-2
## [64] yaml_2.2.0 reticulate_1.11.1 pbapply_1.4-0
## [67] gridExtra_2.3 rpart_4.1-13 segmented_0.5-3.0
## [70] latticeExtra_0.6-28 stringi_1.4.3 foreach_1.4.4
## [73] checkmate_1.9.1 caTools_1.17.1.2 bibtex_0.4.2
## [76] Rdpack_0.10-1 SDMTools_1.1-221 rlang_0.4.1
## [79] pkgconfig_2.0.3 dtw_1.20-1 prabclus_2.2-7
## [82] bitops_1.0-6 evaluate_0.13 lattice_0.20-38
## [85] ROCR_1.0-7 purrr_0.3.3 labeling_0.3
## [88] htmlwidgets_1.3 bit_1.1-14 tidyselect_0.2.5
## [91] plyr_1.8.4 magrittr_1.5 R6_2.4.1
## [94] snow_0.4-3 gplots_3.0.1.1 Hmisc_4.2-0
## [97] pillar_1.4.2 foreign_0.8-71 withr_2.1.2
## [100] fitdistrplus_1.0-14 mixtools_1.1.0 survival_2.43-3
## [103] nnet_7.3-12 tsne_0.1-3 tibble_2.1.3
## [106] crayon_1.3.4 hdf5r_1.1.1 KernSmooth_2.23-15
## [109] rmarkdown_1.13 grid_3.5.3 data.table_1.12.2
## [112] metap_1.1 digest_0.6.19 diptest_0.75-7
## [115] tidyr_0.8.3 R.utils_2.8.0 stats4_3.5.3
## [118] munsell_0.5.0