In this document, we reproduce panels from Figure 4 of our cerebellar cortex transcriptomic atlas manuscript.
library(Seurat)
# v2.3.4
library(liger)
library(ggplot2)
library(cowplot)
# Utility functions
source('../src/fig2_utils.R')
source('../src/fig4_utils.R')
We first load annotated adult mouse cerebellar cortex data. We can retrieve this in raw count format from GEO (cb_adult_mouse tar). Download and expand this directory and copy the files into the data
directory of this repo.
# Read in full dataset (as mtx or RDS file)
# Raw counts can be downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE165371
# (cb_adult_mouse)
# Place in data directory
counts_cb = readMM('../data/cb_adult_mouse.mtx.gz')
barcodes_cb = readLines('../data/cb_adult_mouse_barcodes.txt')
genes_cb = readLines('../data/cb_adult_mouse_genes.txt')
row.names(counts_cb) = genes_cb
colnames(counts_cb) = barcodes_cb
# Create Seurat object if reading in raw data
full_cb = CreateSeuratObject(raw.data = counts_cb)
Alternatively, we can download a Seurat object with UMAP representation stored directly from the Single Cell Portal study. Download the cb_annotated_object.RDS
object from the Download tab of the study and copy into the data
directory. (Note that you will have to sign into SCP in order to access the Download tab.)
# Seurat object can be downloaded from SCP
# place in data directory
full_cb = readRDS('../data/cb_annotated_object.RDS')
We add in the cluster and subcluster annotations. This also includes additional metadata like sample sex. (Note that the SCP object also contains these annotations.)
# read in cluster metadata (available in data directory)
cluster_metadata = read.csv('../data/cluster_metadata.csv', header = T, row.names = 1)
full_cb@meta.data = cluster_metadata
ints_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = c('MLI1', 'MLI2', 'PLI'),
subset.raw = T)
ints_s = NormalizeData(ints_s)
# panel a
genes_ints = c("Htr2a","Slc6a5", "Tns1","Slc16a12","Acvr1c", "Npas3",
"Grm8",'Cdh22', 'Nxph1', 'Prkcd', 'Sorcs3', 'Ptprk')
# pdf("Figure4_panelA.pdf",useDingbats = F,height = 3.5, width = 6)
DotPlot2(ints_s, genes.plot = genes_ints,
dot.scale = 10, x.lab.rot = 45, plot.legend = T)
# dev.off()
# read in HCR data for SORCS3, NXPH1, GRM8
grm_data = read.csv('../data/sorcs_nxph_grm_data.csv', stringsAsFactors = F)
grm_melted = reshape2::melt(grm_data, variable.name = 'slide', value.name = 'count')
## Using mli_ident, position, grm_pos as id variables
summed2 = grm_melted %>% group_by(slide, position, mli_ident) %>% summarise(sum(count)) %>% ungroup()
colnames(summed2) = c('slide', 'position', 'mli_ident', 'sum')
summed2$position = factor(summed2$position, levels = c('distal', 'middle', 'inner'))
merged2 = summed2 %>% group_by(position, slide) %>% summarise(sum(sum)) %>%
rename('total' = 'sum(sum)') %>% right_join(summed2)
## Joining, by = c("position", "slide")
merged2[['percent_pos']] = merged2$sum / merged2$total
merged2$mli_ident = factor(merged2$mli_ident, levels = rev(c('sorcs', 'nxph', 'nxph_sorcs')))
# pdf('Figure4_panelC.pdf', useDingbats = F, width = 6, height = 4)
# set seed for jittering reproducibility
set.seed(1)
merged2 %>% group_by(position, mli_ident) %>% summarise(mean(percent_pos), sd(percent_pos)) %>%
ggplot(aes(x = position, y = `mean(percent_pos)`, fill = mli_ident)) +
geom_bar(stat = 'identity',
position=position_dodge(), width = 0.98) +
scale_fill_manual(values=c('#F8766D', '#3EC14D', '#C13EB2')) +
geom_errorbar(aes(x=position, ymin=`mean(percent_pos)` + `sd(percent_pos)`,
ymax= `mean(percent_pos)` - `sd(percent_pos)`),
position=position_dodge(0.95), width = 0.3, size = 0.3) +
geom_point(data = merged2, aes(x = position, y = percent_pos,
group = interaction(factor(position),factor(mli_ident))),
position = position_jitterdodge(dodge.width=0.95), alpha = 0.6, shape = 1) +
scale_shape(solid = FALSE) + scale_y_continuous(limits = c(0, 0.84), expand = c(0,0)) +
ylab('% cells') + xlab('ML position') +
theme(axis.text.y = element_text(angle = 90)) +
coord_flip()
# dev.off()
See cb_figure4_DE.html.
# Convert to liger object
ints_split = SplitObject(ints_s, attribute.1 = 'sex', do.clean = T)
ints_l = createLiger(raw.data = list(male = ints_split$male@raw.data,
female = ints_split$female@raw.data),
remove.missing = F)
ints_l = liger::normalize(ints_l)
ints_l@clusters = droplevels(ints_s@meta.data[row.names(ints_l@cell.data), 'cluster'])
# pass in tSNE coordinates
tsne_ints = read.csv('../data/ints_tsne.csv', row.names = 1, col.names = c('tSNE_1', 'tSNE_2'))
## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : header and 'col.names' are of different lengths
ints_l@tsne.coords = as.matrix(tsne_ints[row.names(ints_l@cell.data),])
ints_s@dr$tsne = new(Class = "dim.reduction", cell.embeddings = as.matrix(tsne_ints),
key = "tSNE_")
clusts = DimPlot(ints_s, reduction.use = 'tsne', group.by = 'cluster', no.legend = T, no.axes = T,
do.label = T, cols.use = c('#C13EB2', '#3EC14D', 'blue'), pt.size = 0.001, do.return = T)
gjd2 = plotGene_ordered(ints_l, 'Gjd2', plot.by = 'none', points.only = T, do.legend = F,
max.clip = 5, clip.absolute = T, pt.size = 0.001, return.plots = T)
# png("Figure4_panelI.png", units="in", width=6, height=8, res=300)
plot_grid(clusts, gjd2, nrow = 2)
# dev.off()
# read in HCR data for SORCS3, NXPH1, GJD2
gjd_data = read.csv('../data/sorcs_nxph_gjd_data.csv', stringsAsFactors = F)
gjd_melted = reshape2::melt(gjd_data, variable.name = 'slide', value.name = 'count')
## Using mli_ident, gjd_pos, position as id variables
# summarise on two levels
summed = gjd_melted %>% group_by(slide, mli_ident) %>% summarise(sum(count)) %>% ungroup()
summed_gjd = gjd_melted %>% group_by(slide, mli_ident, gjd_pos) %>% summarise(sum(count)) %>%
rename('sum_gjd' = 'sum(count)') %>% ungroup()
merged = inner_join(summed, summed_gjd[summed_gjd$gjd_pos == 'TRUE',])
## Joining, by = c("slide", "mli_ident")
merged[['percent']] = merged$sum_gjd / merged[['sum(count)']]
colnames(merged) = c('slide', 'mli_ident', 'total', 'gjd_pos', 'sum_gjd', 'percent')
# pdf('Figure4_panelK.pdf', useDingbats = F, width = 5, height = 2.5)
set.seed(1)
merged %>% group_by(mli_ident) %>% summarise(mean(percent), sd(percent)) %>%
ggplot(aes(x = mli_ident, y = `mean(percent)`)) +
geom_bar(stat = 'identity', fill = '#C13EB2',
position=position_dodge(width = 0.98)) +
scale_x_discrete(breaks=c("nxph","sorcs"),
labels=c("MLI2", "MLI1")) +
geom_errorbar(aes(x=mli_ident, ymin=`mean(percent)` + `sd(percent)`, ymax= `mean(percent)` - `sd(percent)`),
position=position_dodge(0.95), width = 0.3, size = 0.3) +
xlab('') + ylab('% cells expressing Gjd2') +
coord_flip() +
geom_point(data = merged, aes(x = mli_ident, y = percent),
position = position_jitter(width = 0.3), alpha = 0.6, shape = 1, size = 2) +
scale_shape(solid = FALSE) + scale_y_continuous(limits = c(0, 1.03), expand = c(0,0)) +
theme(axis.text.y = element_text(angle = 90))
# dev.off()
sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] reshape2_1.4.3 sp_1.3-1 tidyr_0.8.3 dplyr_0.8.3
## [5] liger_0.4.2 patchwork_0.0.1 Seurat_2.3.4 Matrix_1.2-16
## [9] cowplot_0.9.4 ggplot2_3.1.1
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 class_7.3-15
## [4] modeltools_0.2-22 ggridges_0.5.1 mclust_5.4.3
## [7] htmlTable_1.13.1 base64enc_0.1-3 rstudioapi_0.10
## [10] proxy_0.4-23 npsurv_0.4-0 riverplot_0.6
## [13] ggrepel_0.8.0 flexmix_2.3-15 bit64_0.9-7
## [16] mvtnorm_1.0-10 codetools_0.2-16 splines_3.5.3
## [19] R.methodsS3_1.7.1 lsei_1.2-0 robustbase_0.93-4
## [22] knitr_1.22 Formula_1.2-3 jsonlite_1.6
## [25] ica_1.0-2 cluster_2.0.7-1 kernlab_0.9-27
## [28] png_0.1-7 R.oo_1.22.0 compiler_3.5.3
## [31] httr_1.4.0 backports_1.1.4 assertthat_0.2.1
## [34] lazyeval_0.2.2 lars_1.2 acepack_1.4.1
## [37] htmltools_0.3.6 tools_3.5.3 igraph_1.2.4
## [40] gtable_0.3.0 glue_1.3.1 RANN_2.6.1
## [43] Rcpp_1.0.3 trimcluster_0.1-2.1 gdata_2.18.0
## [46] ape_5.3 nlme_3.1-137 iterators_1.0.10
## [49] fpc_2.1-11.1 gbRd_0.4-11 lmtest_0.9-36
## [52] xfun_0.6 stringr_1.4.0 irlba_2.3.3
## [55] gtools_3.8.1 DEoptimR_1.0-8 MASS_7.3-51.1
## [58] zoo_1.8-5 scales_1.0.0 doSNOW_1.0.16
## [61] parallel_3.5.3 RColorBrewer_1.1-2 yaml_2.2.0
## [64] reticulate_1.11.1 pbapply_1.4-0 gridExtra_2.3
## [67] rpart_4.1-13 segmented_0.5-3.0 latticeExtra_0.6-28
## [70] stringi_1.4.3 foreach_1.4.4 checkmate_1.9.1
## [73] caTools_1.17.1.2 bibtex_0.4.2 Rdpack_0.10-1
## [76] SDMTools_1.1-221 rlang_0.4.1 pkgconfig_2.0.3
## [79] dtw_1.20-1 prabclus_2.2-7 bitops_1.0-6
## [82] RANN.L1_2.5.2 evaluate_0.13 lattice_0.20-38
## [85] ROCR_1.0-7 purrr_0.3.3 labeling_0.3
## [88] htmlwidgets_1.3 bit_1.1-14 tidyselect_0.2.5
## [91] plyr_1.8.4 magrittr_1.5 R6_2.4.1
## [94] snow_0.4-3 gplots_3.0.1.1 Hmisc_4.2-0
## [97] pillar_1.4.2 foreign_0.8-71 withr_2.1.2
## [100] fitdistrplus_1.0-14 mixtools_1.1.0 survival_2.43-3
## [103] nnet_7.3-12 tsne_0.1-3 tibble_2.1.3
## [106] crayon_1.3.4 hdf5r_1.1.1 KernSmooth_2.23-15
## [109] rmarkdown_1.13 grid_3.5.3 data.table_1.12.2
## [112] FNN_1.1.3 metap_1.1 digest_0.6.19
## [115] diptest_0.75-7 R.utils_2.8.0 stats4_3.5.3
## [118] munsell_0.5.0 viridisLite_0.3.0