library(spacexr)
library(Matrix)
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(ggplot2)
datadir <- system.file("extdata",'SpatialRNA/VisiumVignette',package = 'spacexr') # directory for sample Visium dataset
if(!dir.exists(datadir))
  dir.create(datadir)
savedir <- 'RCTD_results'
if(!dir.exists(savedir))
  dir.create(savedir)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  cache = TRUE,
  out.width = "100%"
)

Introduction

Robust Cell Type Decomposition, or RCTD, is part of the spacexr R package for learning cell type-specific differential expression from spatial transcriptomics data. We will first use RCTD to assign cell types to a toy hippocampus Visium dataset. We will define cell type profiles using an annotated single cell RNA-sequencing (scRNA-seq) hippocampus dataset.

Data Preprocessing and running RCTD

First, we run RCTD on the data to annotate cell types. Please note that this follows exactly the content of the spatial transcriptomics RCTD vignette, except that we use RCTD on multi mode. Since multi mode can discover more than two (up to a prespecified amount) cell types per pixel, it is a reasonable choice for technologies such as Visium that can have spatial resolution much larger than single cells. Please refer to the spatial transcriptomics vignette for more explanation on the RCTD algorithm.

### Load in/preprocess your data, this might vary based on your file type
if(!file.exists(file.path(savedir,'myRCTD_visium_multi.rds'))) {
  counts <- as.data.frame(readr::read_csv(file.path(datadir,"counts.csv"))) # load in counts matrix
  coords <- read.csv(file.path(datadir,"coords.csv"))
  rownames(counts) <- counts[,1]; counts[,1] <- NULL # Move first column to rownames
  rownames(coords) <- coords[,1]; coords[,1] <- NULL # Move first column to rownames
  nUMI <- colSums(counts) # In this case, total counts per pixel is nUMI
  puck <- SpatialRNA(coords, counts, nUMI)
  barcodes <- colnames(puck@counts) # pixels to be used (a list of barcode names). 
  plot_puck_continuous(puck, barcodes, puck@nUMI, ylimit = c(0,round(quantile(puck@nUMI,0.9))), 
                       title ='plot of nUMI') 
  refdir <- system.file("extdata",'Reference/Visium_Ref',package = 'spacexr') # directory for the reference
  counts <- read.csv(file.path(refdir,"counts.csv")) # load in cell types
  rownames(counts) <- counts[,1]; counts[,1] <- NULL # Move first column to rownames
  cell_types <- read.csv(file.path(refdir,"cell_types.csv")) # load in cell types
  cell_types <- setNames(cell_types[[2]], cell_types[[1]])
  cell_types <- as.factor(cell_types) # convert to factor data type
  nUMI <- read.csv(file.path(refdir,"nUMI.csv")) # load in cell types
  nUMI <- setNames(nUMI[[2]], nUMI[[1]])
  reference <- Reference(counts, cell_types, nUMI)
  myRCTD <- create.RCTD(puck, reference, max_cores = 2)
  myRCTD <- run.RCTD(myRCTD, doublet_mode = 'multi')
  saveRDS(myRCTD,file.path(savedir,'myRCTD_visium_multi.rds'))
}
#> New names:
#> * `` -> ...1
#> Rows: 276 Columns: 314
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr   (1): ...1
#> dbl (313): AAAGGGATGTAGCAAG-1, AAAGGGCAGCTTGAAT-1, AACAACTGGTAGTTGC-1, AACCC...
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Warning in Reference(counts, cell_types, nUMI): Reference: nUMI does not match
#> colSums of counts. If this is unintended, please correct this discrepancy. If
#> this is intended, there is no problem.
#> Begin: process_cell_type_info
#> process_cell_type_info: number of cells in reference: 510
#> process_cell_type_info: number of genes in reference: 307
#> 3030303030303030303030303030303030
#> End: process_cell_type_info
#> create.RCTD: getting regression differentially expressed genes:
#> get_de_genes: Astrocyte found DE genes: 21
#> get_de_genes: CA1 found DE genes: 15
#> get_de_genes: CA3 found DE genes: 24
#> get_de_genes: Cajal_Retzius found DE genes: 15
#> get_de_genes: Choroid found DE genes: 12
#> get_de_genes: Denate found DE genes: 14
#> get_de_genes: Endothelial_Stalk found DE genes: 7
#> get_de_genes: Endothelial_Tip found DE genes: 6
#> get_de_genes: Entorihinal found DE genes: 16
#> get_de_genes: Ependymal found DE genes: 11
#> get_de_genes: Interneuron found DE genes: 7
#> get_de_genes: Microglia_Macrophages found DE genes: 6
#> get_de_genes: Mural found DE genes: 8
#> get_de_genes: Neurogenesis found DE genes: 18
#> get_de_genes: Neuron.Slc17a6 found DE genes: 13
#> get_de_genes: Oligodendrocyte found DE genes: 11
#> get_de_genes: Polydendrocyte found DE genes: 4
#> get_de_genes: total DE genes: 102
#> create.RCTD: getting platform effect normalization differentially expressed genes:
#> get_de_genes: Astrocyte found DE genes: 22
#> get_de_genes: CA1 found DE genes: 28
#> get_de_genes: CA3 found DE genes: 32
#> get_de_genes: Cajal_Retzius found DE genes: 36
#> get_de_genes: Choroid found DE genes: 22
#> get_de_genes: Denate found DE genes: 28
#> get_de_genes: Endothelial_Stalk found DE genes: 16
#> get_de_genes: Endothelial_Tip found DE genes: 10
#> get_de_genes: Entorihinal found DE genes: 29
#> get_de_genes: Ependymal found DE genes: 14
#> get_de_genes: Interneuron found DE genes: 13
#> get_de_genes: Microglia_Macrophages found DE genes: 9
#> get_de_genes: Mural found DE genes: 12
#> get_de_genes: Neurogenesis found DE genes: 32
#> get_de_genes: Neuron.Slc17a6 found DE genes: 26
#> get_de_genes: Oligodendrocyte found DE genes: 16
#> get_de_genes: Polydendrocyte found DE genes: 8
#> get_de_genes: total DE genes: 122
#> fitBulk: decomposing bulk
#> chooseSigma: using initial Q_mat with sigma =  1
#> Likelihood value: 123496.681380086
#> Sigma value:  0.84
#> Likelihood value: 119268.308954081
#> Sigma value:  0.69
#> Likelihood value: 114932.720250622
#> Sigma value:  0.61
#> Likelihood value: 112490.291409321
#> Sigma value:  0.53
#> Likelihood value: 109993.152353795
#> Sigma value:  0.45
#> Likelihood value: 107505.825401598
#> Sigma value:  0.37
#> Likelihood value: 105150.81136131
#> Sigma value:  0.29
#> Likelihood value: 103178.818294388
#> Sigma value:  0.21

Exploring the multi mode results

The results of RCTD multi mode are stored in @results, and are accessed as shown below. Each pixel has its own result object, which shows whether that pixel has converged, which cell types appear on the pixel, which cell types are determined as confident by RCTD, and the estimated weight of each cell type. For example, we examine the RCTD multi mode fit on the first pixel as below:

print('Examining results on first pixel / spot')
#> [1] "Examining results on first pixel / spot"
myRCTD <- readRDS(file.path(savedir,'myRCTD_visium_multi.rds'))
first_pixel <- myRCTD@results[[1]] # examine one pixel
print('check convergence:')
#> [1] "check convergence:"
print(first_pixel$conv_sub)
#> [1] TRUE
print('check cell types:')
#> [1] "check cell types:"
print(first_pixel$cell_type_list)
#> [1] "Polydendrocyte" "Denate"
print('check confidence:')
#> [1] "check confidence:"
print(first_pixel$conf_list)
#> Polydendrocyte         Denate 
#>           TRUE          FALSE
print('check weights')
#> [1] "check weights"
print(first_pixel$sub_weights)
#> Polydendrocyte         Denate 
#>      0.7477706      0.2522294