library(MOFA2)
library(tidyverse)
library(cowplot)
library(magrittr)

1 Load ST data set

We use the example data provided in the Seurat vignette, which contains spatial transcriptomics data on a slide of a mouse brain tissue generated using 10x visium.

library(Seurat)

## Registered S3 method overwritten by 'spatstat':
##   method     from
##   print.boxx cli

SeuratData::InstallData("stxBrain")

## Warning: The following packages are already installed and will not be
## reinstalled: stxBrain

brain <- SeuratData::LoadData("stxBrain", type = "anterior1")
brain <- Seurat::NormalizeData(brain, assay = "Spatial", verbose = FALSE)
brain <- Seurat::FindVariableFeatures(brain, assay = "Spatial")

2 Preprocess data for MEFISTO

Next, we prepare the data for input to MEFISTO. For this we need the expression data (using the 2000 top variable features) as well as the spatial coordinates of each sample (spot).

expression_data <- as.matrix(brain@assays$Spatial@data[VariableFeatures(brain),])

dim(expression_data)

## [1] 2000 2696

locs <- GetTissueCoordinates(brain)
locs <- tibble::rownames_to_column(locs, "sample") %>%
  gather(key = "covariate", value = "value", starts_with("image"))

head(locs %>% arrange(sample))

##               sample covariate    value
## 1 AAACAAGTATCTCCCA-1  imagerow 385.9725
## 2 AAACAAGTATCTCCCA-1  imagecol 438.9501
## 3 AAACACCAATAACTGC-1  imagerow 441.6351
## 4 AAACACCAATAACTGC-1  imagecol 143.9587
## 5 AAACAGAGCGACTCCT-1  imagerow 163.3735
## 6 AAACAGAGCGACTCCT-1  imagecol 410.4991

3 Create a MOFA object specifying spatial coordinates as covariates

To create a space-aware MOFA model (= MEFISTO) we provide MOFA the expression data as well as the spatial coordinates in set_covariates.

# create MOFA objects and specify the spatial coordinates as covariates
sm <- create_mofa(data = list(RNA = expression_data))

## Creating MOFA object from a list of matrices (features as rows, sample as columns)...

sm <- set_covariates(sm, locs)

4 Prepare training

Before training, we can specify various options for the model, the training and the data pre-processing. If no options are specified, the model will use the default options. See also get_default_data_options, get_default_model_options, get_default_training_options and get_default_mefisto_options to have a look at the various options and their defaults. Here, we in particular, specify that we want to use spare Gaussian process with a smaller number of inducing points which can speed up training.

model_opts <- get_default_model_options(sm)
model_opts$num_factors <- 4

n_inducing <- 1000 # We use 1000 inducing points to learn spatial covariance patterns
mefisto_opts <- get_default_mefisto_options(sm)
mefisto_opts$sparseGP <- TRUE
mefisto_opts$frac_inducing <- n_inducing / get_dimensions(sm)$N

train_opts <- get_default_training_options(sm)
train_opts$seed <- 2021

sm <- prepare_mofa(sm, training_options = train_opts,
                   model_options = model_opts,
                   mefisto_options = mefisto_opts)

## Checking data options...

## No data options specified, using default...

## Checking training options...

## Checking model options...

## Checking inference options for mefisto covariates...

Next, we train the model. As running the model can take some time, we here load a pre-trained model. This can be found here.

outfile = "data/ST_model.hdf5"

# sm <- run_mofa(sm, outfile = outfile)

5 Load a trained model

sm <- load_model(outfile)

6 Downstream analyses

6.1 Variance decomposition

First, we can take a look whether our factor are uncorrelated.

plot_factor_cor(sm)

6.2 Spatial factors

Next, we will have a look at the spatial patterns that are captured by each factor. We here rotate the tissue to obtain the same as orientation as in SpatialFeaturePlot below.

plot_factors_vs_cov(sm, rotate_y = TRUE)

6.3 Smoothness of factors

All of this factors seem to capture spatial patterns of variation that seems to vary smoothly along space to some extent. We can take a look at the smoothness score inferred by the model.

plot_smoothness(sm)

6.4 Weights

To inspect which genes underly these spatial patterns, we can take a look at the top weights per factor. For example, Factor 4 highlights Ttr, a marker of the choroid plexus.

plot_top_weights(sm, factors = 4)

To explore the expression patterns of these genes that are highlighted on the weights by MEFISTO, we can use Seurat’s SpatialFeaturePlot.

W <- get_weights(sm)[[1]]
top_weights <- rownames(W)[apply(abs(W), 2, which.max )]
list_seurat <- SpatialFeaturePlot(brain, features = top_weights,
                                combine = F)
gg_seurat <- plot_grid(plotlist = list_seurat, nrow = 1)
gg_seurat

7 SessionInfo

sessionInfo()

## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] Seurat_3.2.3     magrittr_2.0.1   cowplot_1.1.0    forcats_0.5.0   
##  [5] stringr_1.4.0    dplyr_1.0.2      purrr_0.3.4      readr_1.4.0     
##  [9] tidyr_1.1.2      tibble_3.0.4     ggplot2_3.3.2    tidyverse_1.3.0 
## [13] MOFA2_1.1.10     BiocStyle_2.18.1
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1              backports_1.2.1          
##   [3] corrplot_0.84             plyr_1.8.6               
##   [5] igraph_1.2.6              lazyeval_0.2.2           
##   [7] splines_4.0.3             listenv_0.8.0            
##   [9] scattermore_0.7           digest_0.6.27            
##  [11] htmltools_0.5.0           magick_2.5.2             
##  [13] fansi_0.4.1               panc8.SeuratData_3.0.2   
##  [15] tensor_1.5                cluster_2.1.0            
##  [17] ROCR_1.0-11               globals_0.14.0           
##  [19] modelr_0.1.8              matrixStats_0.57.0       
##  [21] colorspace_2.0-0          rvest_0.3.6              
##  [23] rappdirs_0.3.1            ggrepel_0.9.0            
##  [25] haven_2.3.1               xfun_0.19                
##  [27] stxBrain.SeuratData_0.1.1 crayon_1.3.4             
##  [29] jsonlite_1.7.2            spatstat_1.64-1          
##  [31] spatstat.data_1.7-0       survival_3.2-7           
##  [33] zoo_1.8-8                 glue_1.4.2               
##  [35] polyclip_1.10-0           gtable_0.3.0             
##  [37] leiden_0.3.6              DelayedArray_0.16.0      
##  [39] Rhdf5lib_1.12.0           future.apply_1.6.0       
##  [41] BiocGenerics_0.36.0       HDF5Array_1.18.0         
##  [43] abind_1.4-5               scales_1.1.1             
##  [45] pheatmap_1.0.12           mvtnorm_1.1-1            
##  [47] bmcite.SeuratData_0.3.0   DBI_1.1.0                
##  [49] pbmc3k.SeuratData_3.1.4   miniUI_0.1.1.1           
##  [51] Rcpp_1.0.5                viridisLite_0.3.0        
##  [53] xtable_1.8-4              reticulate_1.18          
##  [55] rsvd_1.0.3                stats4_4.0.3             
##  [57] htmlwidgets_1.5.3         httr_1.4.2               
##  [59] RColorBrewer_1.1-2        ellipsis_0.3.1           
##  [61] ica_1.0-2                 farver_2.0.3             
##  [63] pkgconfig_2.0.3           deldir_0.2-3             
##  [65] uwot_0.1.10               dbplyr_2.0.0             
##  [67] labeling_0.4.2            tidyselect_1.1.0         
##  [69] rlang_0.4.9               reshape2_1.4.4           
##  [71] later_1.1.0.1             munsell_0.5.0            
##  [73] cellranger_1.1.0          tools_4.0.3              
##  [75] cli_2.2.0                 generics_0.1.0           
##  [77] broom_0.7.3               ggridges_0.5.2           
##  [79] evaluate_0.14             fastmap_1.0.1            
##  [81] yaml_2.2.1                goftest_1.2-2            
##  [83] knitr_1.30                fs_1.5.0                 
##  [85] fitdistrplus_1.1-3        SeuratData_0.2.1         
##  [87] RANN_2.6.1                pbapply_1.4-3            
##  [89] future_1.21.0             nlme_3.1-151             
##  [91] mime_0.9                  xml2_1.3.2               
##  [93] compiler_4.0.3            rstudioapi_0.13          
##  [95] plotly_4.9.2.2            filelock_1.0.2           
##  [97] png_0.1-7                 spatstat.utils_1.17-0    
##  [99] reprex_0.3.0              stringi_1.5.3            
## [101] basilisk.utils_1.2.1      lattice_0.20-41          
## [103] Matrix_1.2-18             vctrs_0.3.6              
## [105] pillar_1.4.7              lifecycle_0.2.0          
## [107] rhdf5filters_1.2.0        BiocManager_1.30.10      
## [109] lmtest_0.9-38             RcppAnnoy_0.0.18         
## [111] data.table_1.13.4         irlba_2.3.3              
## [113] httpuv_1.5.4              patchwork_1.1.1          
## [115] R6_2.5.0                  bookdown_0.21            
## [117] promises_1.1.1            KernSmooth_2.23-18       
## [119] gridExtra_2.3             IRanges_2.24.1           
## [121] parallelly_1.22.0         codetools_0.2-18         
## [123] MASS_7.3-53               assertthat_0.2.1         
## [125] rhdf5_2.34.0              withr_2.3.0              
## [127] sctransform_0.3.2         S4Vectors_0.28.1         
## [129] mgcv_1.8-33               parallel_4.0.3           
## [131] hms_0.5.3                 rpart_4.1-15             
## [133] grid_4.0.3                basilisk_1.2.1           
## [135] rmarkdown_2.6             MatrixGenerics_1.2.0     
## [137] Rtsne_0.15                shiny_1.5.0              
## [139] lubridate_1.7.9.2

MEFISTO application to spatial transcriptomics data

2021-02-04