Figure 4: Characterization of developing MLI2 cells

In this document, we reproduce panels from Figure 4 of our cerebellar cortex transcriptomic atlas manuscript.

Load utility functions and data

library(Seurat)
# v3.1.5
library(monocle3)
library(ggplot2)
library(cowplot)

# Additional plotting functions
source('../src/fig4_monocle.R')

# Define an extra utility function
Matrix.column_norm <- function(A){
  if (class(A)[1] == "dgTMatrix") {
    temp = summary(A)
    col.names = colnames(A)
    row.names = rownames(A)
    A = sparseMatrix(i=temp[,1],j=temp[,2],x=temp[,3])
    rownames(A) = row.names
    colnames(A) = col.names
  }
  A@x <- A@x / rep.int(Matrix::colSums(A), diff(A@p))
  return(A)
}

We can download a Seurat object with relevant metadata directly from the Single Cell Portal study. Download the cb_dev_annotated.RDS object from the Download tab of the study and copy into the data directory. (Note that you will have to sign into SCP in order to access the Download tab.)

cb_dev_s = readRDS('../data/cb_dev_annotated.RDS')

To reproduce some of the pseudotime and gene module analyses (also relevant for Extended Data Figure 6), we need to create a Monocle object and do some basic preprocessing. Note that the UMAP coordinates are taken directly from the original LIGER analysis of the unannotated developmental data (this unannotated data is available as raw count matrix in RDS format on SCP, as cb_dev_counts.RDS).

# Create monocle object
cb_dev_mon = new_cell_data_set(expression_data = cb_dev_s@assays$RNA@counts)

## Warning in new_cell_data_set(expression_data = cb_dev_s@assays$RNA@counts):
## Warning: gene_metadata must contain a column verbatim named 'gene_short_name'
## for certain functions.

# add batch information 
cb_dev_mon@colData[['sample']] = sapply(colnames(cb_dev_mon), function(x) {
  splits = strsplit(x, '_')[[1]]
  # if from the dev datasets
  if (splits[2] %in% c(1, 2)) {
    paste0(splits[1], '_', splits[2])
  } else {
    paste0('P60_', splits[1], '_', splits[2])
  }
})
# set gene row name 
rowData(cb_dev_mon)[['gene_short_name']] = row.names(rowData(cb_dev_mon))

cb_dev_mon@colData[['age']] = sapply(cb_dev_mon@colData$sample, function(x) {
  strsplit(x, '_')[[1]][1]
})
# basic preprocessing
cb_dev_mon <- preprocess_cds(cb_dev_mon, num_dim = 30, use_genes = cb_dev_s@assays$RNA@var.features)
# set UMAP
reducedDims(cb_dev_mon)$UMAP = cb_dev_s@reductions$umap@cell.embeddings

# generate partitions/cluster from UMAP
cb_dev_mon <- cluster_cells(cb_dev_mon, resolution = 5e-4, random_seed = 1)

cb_dev_mon <- learn_graph(cb_dev_mon)

# Compute pseudotime
# Select top node as root along with rightmost node of 
# non-MLI partition (if running in interactive)

# these nodes are 
root_pr_nodes = c('Y_62', 'Y_243')

cb_dev_mon = order_cells(cb_dev_mon, root_pr_nodes = root_pr_nodes)

Panel D

cb_dev_mon@colData$age = factor(cb_dev_mon@colData$age, 
                                levels = c('E18', 'P0', 'P4', 'P8', 'P12', 'P16'), ordered = T)

# pdf('Figure4_panelD.pdf', width = 3.2, height = 3, useDingbats = F)
age = plot_cells_colors(cb_dev_mon,
           color_cells_by = 'age',
           label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE,
           label_roots = F,
           graph_label_size=3,
           trajectory_graph_segment_size = 0.5,
           trajectory_graph_color = 'grey60',
           cell_size = 0.5, direction = -1, viridis_option = 'viridis')
# dev.off()


gradient = plot_cells_colors(cb_dev_mon,
                         color_cells_by = 'pseudotime',
                         label_cell_groups=FALSE,
                         label_leaves=FALSE,
                         label_branch_points=FALSE,
                         label_roots = F,
                         graph_label_size=3,
                         trajectory_graph_segment_size = 0.5,
                         trajectory_graph_color = 'grey60',
                         cell_size = 0.5, use_gradient = T, high_col = 'orangered')

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

plot_grid(age, gradient)

Panel E

# compute normalization as in liger
norm_genes = Matrix.column_norm(cb_dev_s@assays$RNA@counts)

# note that each gene is individually scaled for better resolution of developmental temporal changes 
# remove legend.position = 'none' for legend comparison

# pdf('Figure4_panelE.pdf', width = 7.8, height = 3.2, useDingbats = F)
sorcs = plot_cells_colors(cb_dev_mon,
                            genes = c('Sorcs3'),
                            label_cell_groups=FALSE,
                            label_leaves=FALSE,
                            label_branch_points=FALSE,
                            label_roots = F,
                            graph_label_size=3,
                            trajectory_graph_segment_size = 0.5,
                            trajectory_graph_color = 'grey60',
                            cell_size = 0.5,
                            normalized_values = norm_genes) + theme(legend.position = 'none')
nxph = plot_cells_colors(cb_dev_mon,
                           genes = c('Nxph1'),
                           label_cell_groups=FALSE,
                           label_leaves=FALSE,
                           label_branch_points=FALSE,
                           label_roots = F,
                           graph_label_size=3,
                           trajectory_graph_segment_size = 0.5,
                           trajectory_graph_color = 'grey60',
                           cell_size = 0.5,
                           normalized_values = norm_genes) + theme(legend.position = 'none')
fos = plot_cells_colors(cb_dev_mon,
                          genes = c('Fos'),
                          label_cell_groups=FALSE,
                          label_leaves=FALSE,
                          label_branch_points=FALSE,
                          label_roots = F,
                          graph_label_size=3,
                          trajectory_graph_segment_size = 0.5,
                          trajectory_graph_color = 'grey60',
                          cell_size = 0.5,
                          normalized_values = norm_genes) + theme(legend.position = 'none')
plot_grid(sorcs, nxph, fos, nrow = 1)

# dev.off()

sessionInfo()

## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 10 (buster)
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.8.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.8.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] dplyr_0.8.5                 cowplot_1.0.0              
##  [3] ggplot2_3.3.0               monocle3_0.2.2             
##  [5] SingleCellExperiment_1.8.0  SummarizedExperiment_1.16.1
##  [7] DelayedArray_0.12.3         BiocParallel_1.20.1        
##  [9] matrixStats_0.56.0          GenomicRanges_1.38.0       
## [11] GenomeInfoDb_1.22.1         IRanges_2.20.2             
## [13] S4Vectors_0.24.4            Biobase_2.46.0             
## [15] BiocGenerics_0.32.0         Seurat_3.1.5               
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-144             tsne_0.1-3               bitops_1.0-6            
##  [4] RcppAnnoy_0.0.16         RColorBrewer_1.1-2       httr_1.4.1              
##  [7] sctransform_0.2.1        tools_3.6.3              R6_2.4.1                
## [10] irlba_2.3.3              KernSmooth_2.23-16       uwot_0.1.8              
## [13] lazyeval_0.2.2           colorspace_1.4-1         withr_2.2.0             
## [16] tidyselect_1.1.0         gridExtra_2.3            compiler_3.6.3          
## [19] plotly_4.9.2.1           labeling_0.3             scales_1.1.1            
## [22] lmtest_0.9-37            proxy_0.4-24             ggridges_0.5.2          
## [25] pbapply_1.4-2            rappdirs_0.3.1           stringr_1.4.0           
## [28] digest_0.6.25            rmarkdown_2.9            XVector_0.26.0          
## [31] pkgconfig_2.0.3          htmltools_0.5.0          htmlwidgets_1.5.1       
## [34] rlang_0.4.6              DelayedMatrixStats_1.8.0 farver_2.0.3            
## [37] zoo_1.8-8                jsonlite_1.6.1           ica_1.0-2               
## [40] RCurl_1.98-1.2           magrittr_1.5             GenomeInfoDbData_1.2.2  
## [43] patchwork_1.0.0          Matrix_1.2-18            Rcpp_1.0.4.6            
## [46] munsell_0.5.0            viridis_0.5.1            ape_5.3                 
## [49] reticulate_1.16          lifecycle_0.2.0          stringi_1.4.6           
## [52] yaml_2.2.1               zlibbioc_1.32.0          MASS_7.3-51.5           
## [55] Rtsne_0.15               plyr_1.8.6               grid_3.6.3              
## [58] listenv_0.8.0            ggrepel_0.8.2            crayon_1.3.4            
## [61] lattice_0.20-40          splines_3.6.3            knitr_1.28              
## [64] pillar_1.4.4             igraph_1.2.5             future.apply_1.5.0      
## [67] reshape2_1.4.4           codetools_0.2-16         leiden_0.3.3            
## [70] glue_1.4.1               evaluate_0.14            leidenbase_0.1.1        
## [73] data.table_1.12.8        png_0.1-7                vctrs_0.3.0             
## [76] gtable_0.3.0             RANN_2.6.1               purrr_0.3.4             
## [79] tidyr_1.1.0              future_1.17.0            assertthat_0.2.1        
## [82] xfun_0.24                rsvd_1.0.3               survival_3.1-8          
## [85] viridisLite_0.3.0        tibble_3.0.1             cluster_2.1.0           
## [88] globals_0.12.5           fitdistrplus_1.1-1       ellipsis_0.3.1          
## [91] ROCR_1.0-11