Figure 4: Characterization of MLI2 cells

In this document, we reproduce panels from Figure 4 of our cerebellar cortex transcriptomic atlas manuscript.

Load utility functions and data

library(Seurat)
# v2.3.4
library(liger)
library(ggplot2)
library(cowplot)

# Utility functions
source('../src/fig2_utils.R')
source('../src/fig4_utils.R')

We first load annotated adult mouse cerebellar cortex data. We can retrieve this in raw count format from GEO (cb_adult_mouse tar). Download and expand this directory and copy the files into the data directory of this repo.

# Read in full dataset (as mtx or RDS file)
# Raw counts can be downloaded from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE165371
# (cb_adult_mouse)
# Place in data directory
counts_cb = readMM('../data/cb_adult_mouse.mtx.gz')
barcodes_cb = readLines('../data/cb_adult_mouse_barcodes.txt')
genes_cb = readLines('../data/cb_adult_mouse_genes.txt')

row.names(counts_cb) = genes_cb
colnames(counts_cb) = barcodes_cb

# Create Seurat object if reading in raw data
full_cb = CreateSeuratObject(raw.data = counts_cb)

Alternatively, we can download a Seurat object with UMAP representation stored directly from the Single Cell Portal study. Download the cb_annotated_object.RDS object from the Download tab of the study and copy into the data directory. (Note that you will have to sign into SCP in order to access the Download tab.)

# Seurat object can be downloaded from SCP
# place in data directory
full_cb = readRDS('../data/cb_annotated_object.RDS')

We add in the cluster and subcluster annotations. This also includes additional metadata like sample sex. (Note that the SCP object also contains these annotations.)

# read in cluster metadata (available in data directory)
cluster_metadata = read.csv('../data/cluster_metadata.csv', header = T, row.names = 1)
full_cb@meta.data = cluster_metadata

Panel A

ints_s = SubsetData(full_cb, subset.name = 'cluster', accept.value = c('MLI1', 'MLI2', 'PLI'),
                    subset.raw = T)
ints_s = NormalizeData(ints_s)

# panel a
genes_ints = c("Htr2a","Slc6a5", "Tns1","Slc16a12","Acvr1c", "Npas3",
               "Grm8",'Cdh22', 'Nxph1', 'Prkcd', 'Sorcs3',  'Ptprk')


# pdf("Figure4_panelA.pdf",useDingbats = F,height = 3.5, width = 6)
DotPlot2(ints_s, genes.plot = genes_ints,
         dot.scale = 10, x.lab.rot = 45, plot.legend = T)

# dev.off()

Panel C

# read in HCR data for SORCS3, NXPH1, GRM8
grm_data = read.csv('../data/sorcs_nxph_grm_data.csv', stringsAsFactors = F)
grm_melted = reshape2::melt(grm_data, variable.name = 'slide', value.name = 'count')

## Using mli_ident, position, grm_pos as id variables

summed2 =  grm_melted %>% group_by(slide, position, mli_ident) %>% summarise(sum(count)) %>% ungroup()
colnames(summed2) = c('slide', 'position', 'mli_ident', 'sum')

summed2$position = factor(summed2$position, levels = c('distal', 'middle', 'inner'))
merged2 = summed2 %>% group_by(position, slide) %>% summarise(sum(sum)) %>%
  rename('total' = 'sum(sum)') %>% right_join(summed2)

## Joining, by = c("position", "slide")

merged2[['percent_pos']] = merged2$sum / merged2$total

merged2$mli_ident = factor(merged2$mli_ident, levels = rev(c('sorcs', 'nxph', 'nxph_sorcs')))

# pdf('Figure4_panelC.pdf', useDingbats = F, width = 6, height = 4)
# set seed for jittering reproducibility 
set.seed(1)
merged2 %>% group_by(position, mli_ident) %>% summarise(mean(percent_pos), sd(percent_pos)) %>%
  ggplot(aes(x = position, y = `mean(percent_pos)`, fill = mli_ident)) +
  geom_bar(stat = 'identity',
           position=position_dodge(), width = 0.98) + 
  scale_fill_manual(values=c('#F8766D', '#3EC14D', '#C13EB2')) + 
  geom_errorbar(aes(x=position, ymin=`mean(percent_pos)` + `sd(percent_pos)`, 
                    ymax= `mean(percent_pos)` - `sd(percent_pos)`),
                position=position_dodge(0.95), width = 0.3, size = 0.3) + 
  geom_point(data = merged2, aes(x = position, y = percent_pos,
                                 group = interaction(factor(position),factor(mli_ident))),
             position = position_jitterdodge(dodge.width=0.95), alpha = 0.6, shape = 1) + 
  scale_shape(solid = FALSE) + scale_y_continuous(limits = c(0, 0.84), expand = c(0,0)) + 
  ylab('% cells') + xlab('ML position') + 
  theme(axis.text.y = element_text(angle = 90)) + 
  coord_flip()

# dev.off()

Panel D-E

See cb_figure4_DE.html.

Panel I

# Convert to liger object
ints_split = SplitObject(ints_s, attribute.1 = 'sex', do.clean = T)

ints_l = createLiger(raw.data = list(male = ints_split$male@raw.data,
                                     female = ints_split$female@raw.data),
                     remove.missing = F)
ints_l = liger::normalize(ints_l)
ints_l@clusters = droplevels(ints_s@meta.data[row.names(ints_l@cell.data), 'cluster'])

# pass in tSNE coordinates
tsne_ints = read.csv('../data/ints_tsne.csv', row.names = 1, col.names = c('tSNE_1', 'tSNE_2'))

## Warning in read.table(file = file, header = header, sep = sep, quote =
## quote, : header and 'col.names' are of different lengths

ints_l@tsne.coords = as.matrix(tsne_ints[row.names(ints_l@cell.data),])

ints_s@dr$tsne = new(Class = "dim.reduction", cell.embeddings = as.matrix(tsne_ints), 
                     key = "tSNE_")

clusts = DimPlot(ints_s, reduction.use = 'tsne', group.by = 'cluster', no.legend = T, no.axes = T,
                 do.label = T, cols.use = c('#C13EB2', '#3EC14D', 'blue'), pt.size = 0.001, do.return = T)

gjd2 = plotGene_ordered(ints_l, 'Gjd2', plot.by = 'none', points.only = T, do.legend = F, 
                     max.clip = 5, clip.absolute = T, pt.size = 0.001, return.plots = T)

# png("Figure4_panelI.png", units="in", width=6, height=8, res=300)
plot_grid(clusts, gjd2, nrow = 2)

# dev.off()

Panel K

# read in HCR data for SORCS3, NXPH1, GJD2
gjd_data = read.csv('../data/sorcs_nxph_gjd_data.csv', stringsAsFactors = F)
gjd_melted = reshape2::melt(gjd_data, variable.name = 'slide', value.name = 'count')

## Using mli_ident, gjd_pos, position as id variables

# summarise on two levels
summed =  gjd_melted %>% group_by(slide, mli_ident) %>% summarise(sum(count)) %>% ungroup()
summed_gjd = gjd_melted %>% group_by(slide, mli_ident, gjd_pos) %>% summarise(sum(count)) %>% 
  rename('sum_gjd' = 'sum(count)') %>% ungroup()

merged = inner_join(summed, summed_gjd[summed_gjd$gjd_pos == 'TRUE',])

## Joining, by = c("slide", "mli_ident")

merged[['percent']] = merged$sum_gjd / merged[['sum(count)']]
colnames(merged) =  c('slide', 'mli_ident', 'total', 'gjd_pos', 'sum_gjd', 'percent')

# pdf('Figure4_panelK.pdf', useDingbats = F, width = 5, height = 2.5)
set.seed(1)
merged %>% group_by(mli_ident) %>% summarise(mean(percent), sd(percent)) %>%
  ggplot(aes(x = mli_ident, y = `mean(percent)`)) +
  geom_bar(stat = 'identity', fill = '#C13EB2',
           position=position_dodge(width = 0.98)) + 
  scale_x_discrete(breaks=c("nxph","sorcs"),
                   labels=c("MLI2", "MLI1")) +
  geom_errorbar(aes(x=mli_ident, ymin=`mean(percent)` + `sd(percent)`, ymax= `mean(percent)` - `sd(percent)`),
                position=position_dodge(0.95), width = 0.3, size = 0.3) + 
  xlab('') + ylab('% cells expressing Gjd2') + 
  coord_flip() +
  geom_point(data = merged, aes(x = mli_ident, y = percent),
             position = position_jitter(width = 0.3), alpha = 0.6, shape = 1, size = 2) + 
  scale_shape(solid = FALSE) + scale_y_continuous(limits = c(0, 1.03), expand = c(0,0)) + 
  theme(axis.text.y = element_text(angle = 90))

# dev.off()

sessionInfo()

## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] reshape2_1.4.3  sp_1.3-1        tidyr_0.8.3     dplyr_0.8.3    
##  [5] liger_0.4.2     patchwork_0.0.1 Seurat_2.3.4    Matrix_1.2-16  
##  [9] cowplot_0.9.4   ggplot2_3.1.1  
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15          colorspace_1.4-1    class_7.3-15       
##   [4] modeltools_0.2-22   ggridges_0.5.1      mclust_5.4.3       
##   [7] htmlTable_1.13.1    base64enc_0.1-3     rstudioapi_0.10    
##  [10] proxy_0.4-23        npsurv_0.4-0        riverplot_0.6      
##  [13] ggrepel_0.8.0       flexmix_2.3-15      bit64_0.9-7        
##  [16] mvtnorm_1.0-10      codetools_0.2-16    splines_3.5.3      
##  [19] R.methodsS3_1.7.1   lsei_1.2-0          robustbase_0.93-4  
##  [22] knitr_1.22          Formula_1.2-3       jsonlite_1.6       
##  [25] ica_1.0-2           cluster_2.0.7-1     kernlab_0.9-27     
##  [28] png_0.1-7           R.oo_1.22.0         compiler_3.5.3     
##  [31] httr_1.4.0          backports_1.1.4     assertthat_0.2.1   
##  [34] lazyeval_0.2.2      lars_1.2            acepack_1.4.1      
##  [37] htmltools_0.3.6     tools_3.5.3         igraph_1.2.4       
##  [40] gtable_0.3.0        glue_1.3.1          RANN_2.6.1         
##  [43] Rcpp_1.0.3          trimcluster_0.1-2.1 gdata_2.18.0       
##  [46] ape_5.3             nlme_3.1-137        iterators_1.0.10   
##  [49] fpc_2.1-11.1        gbRd_0.4-11         lmtest_0.9-36      
##  [52] xfun_0.6            stringr_1.4.0       irlba_2.3.3        
##  [55] gtools_3.8.1        DEoptimR_1.0-8      MASS_7.3-51.1      
##  [58] zoo_1.8-5           scales_1.0.0        doSNOW_1.0.16      
##  [61] parallel_3.5.3      RColorBrewer_1.1-2  yaml_2.2.0         
##  [64] reticulate_1.11.1   pbapply_1.4-0       gridExtra_2.3      
##  [67] rpart_4.1-13        segmented_0.5-3.0   latticeExtra_0.6-28
##  [70] stringi_1.4.3       foreach_1.4.4       checkmate_1.9.1    
##  [73] caTools_1.17.1.2    bibtex_0.4.2        Rdpack_0.10-1      
##  [76] SDMTools_1.1-221    rlang_0.4.1         pkgconfig_2.0.3    
##  [79] dtw_1.20-1          prabclus_2.2-7      bitops_1.0-6       
##  [82] RANN.L1_2.5.2       evaluate_0.13       lattice_0.20-38    
##  [85] ROCR_1.0-7          purrr_0.3.3         labeling_0.3       
##  [88] htmlwidgets_1.3     bit_1.1-14          tidyselect_0.2.5   
##  [91] plyr_1.8.4          magrittr_1.5        R6_2.4.1           
##  [94] snow_0.4-3          gplots_3.0.1.1      Hmisc_4.2-0        
##  [97] pillar_1.4.2        foreign_0.8-71      withr_2.1.2        
## [100] fitdistrplus_1.0-14 mixtools_1.1.0      survival_2.43-3    
## [103] nnet_7.3-12         tsne_0.1-3          tibble_2.1.3       
## [106] crayon_1.3.4        hdf5r_1.1.1         KernSmooth_2.23-15 
## [109] rmarkdown_1.13      grid_3.5.3          data.table_1.12.2  
## [112] FNN_1.1.3           metap_1.1           digest_0.6.19      
## [115] diptest_0.75-7      R.utils_2.8.0       stats4_3.5.3       
## [118] munsell_0.5.0       viridisLite_0.3.0