library(MOFA2)
library(tidyverse)
library(cowplot)
library(magrittr)
We use the example data provided in the Seurat vignette, which contains spatial transcriptomics data on a slide of a mouse brain tissue generated using 10x visium.
library(Seurat)
## Registered S3 method overwritten by 'spatstat':
## method from
## print.boxx cli
SeuratData::InstallData("stxBrain")
## Warning: The following packages are already installed and will not be
## reinstalled: stxBrain
brain <- SeuratData::LoadData("stxBrain", type = "anterior1")
brain <- Seurat::NormalizeData(brain, assay = "Spatial", verbose = FALSE)
brain <- Seurat::FindVariableFeatures(brain, assay = "Spatial")
Next, we prepare the data for input to MEFISTO. For this we need the expression data (using the 2000 top variable features) as well as the spatial coordinates of each sample (spot).
expression_data <- as.matrix(brain@assays$Spatial@data[VariableFeatures(brain),])
dim(expression_data)
## [1] 2000 2696
locs <- GetTissueCoordinates(brain)
locs <- tibble::rownames_to_column(locs, "sample") %>%
gather(key = "covariate", value = "value", starts_with("image"))
head(locs %>% arrange(sample))
## sample covariate value
## 1 AAACAAGTATCTCCCA-1 imagerow 385.9725
## 2 AAACAAGTATCTCCCA-1 imagecol 438.9501
## 3 AAACACCAATAACTGC-1 imagerow 441.6351
## 4 AAACACCAATAACTGC-1 imagecol 143.9587
## 5 AAACAGAGCGACTCCT-1 imagerow 163.3735
## 6 AAACAGAGCGACTCCT-1 imagecol 410.4991
To create a space-aware MOFA model (= MEFISTO) we provide MOFA the expression data as well as the spatial coordinates in set_covariates
.
# create MOFA objects and specify the spatial coordinates as covariates
sm <- create_mofa(data = list(RNA = expression_data))
## Creating MOFA object from a list of matrices (features as rows, sample as columns)...
sm <- set_covariates(sm, locs)
Before training, we can specify various options for the model, the training and the data pre-processing. If no options are specified, the model will use the default options. See also get_default_data_options
, get_default_model_options
, get_default_training_options
and get_default_mefisto_options
to have a look at the various options and their defaults. Here, we in particular, specify that we want to use spare Gaussian process with a smaller number of inducing points which can speed up training.
model_opts <- get_default_model_options(sm)
model_opts$num_factors <- 4
n_inducing <- 1000 # We use 1000 inducing points to learn spatial covariance patterns
mefisto_opts <- get_default_mefisto_options(sm)
mefisto_opts$sparseGP <- TRUE
mefisto_opts$frac_inducing <- n_inducing / get_dimensions(sm)$N
train_opts <- get_default_training_options(sm)
train_opts$seed <- 2021
sm <- prepare_mofa(sm, training_options = train_opts,
model_options = model_opts,
mefisto_options = mefisto_opts)
## Checking data options...
## No data options specified, using default...
## Checking training options...
## Checking model options...
## Checking inference options for mefisto covariates...
Next, we train the model. As running the model can take some time, we here load a pre-trained model. This can be found here.
outfile = "data/ST_model.hdf5"
# sm <- run_mofa(sm, outfile = outfile)
sm <- load_model(outfile)
First, we can take a look whether our factor are uncorrelated.
plot_factor_cor(sm)
Next, we will have a look at the spatial patterns that are captured by each factor.
We here rotate the tissue to obtain the same as orientation as in SpatialFeaturePlot
below.
plot_factors_vs_cov(sm, rotate_y = TRUE)
All of this factors seem to capture spatial patterns of variation that seems to vary smoothly along space to some extent. We can take a look at the smoothness score inferred by the model.
plot_smoothness(sm)
To inspect which genes underly these spatial patterns, we can take a look at the top weights per factor. For example, Factor 4 highlights Ttr, a marker of the choroid plexus.
plot_top_weights(sm, factors = 4)
To explore the expression patterns of these genes that are highlighted on the weights by MEFISTO, we can use Seurat’s SpatialFeaturePlot
.
W <- get_weights(sm)[[1]]
top_weights <- rownames(W)[apply(abs(W), 2, which.max )]
list_seurat <- SpatialFeaturePlot(brain, features = top_weights,
combine = F)
gg_seurat <- plot_grid(plotlist = list_seurat, nrow = 1)
gg_seurat
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Seurat_3.2.3 magrittr_2.0.1 cowplot_1.1.0 forcats_0.5.0
## [5] stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4 readr_1.4.0
## [9] tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2 tidyverse_1.3.0
## [13] MOFA2_1.1.10 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1
## [3] corrplot_0.84 plyr_1.8.6
## [5] igraph_1.2.6 lazyeval_0.2.2
## [7] splines_4.0.3 listenv_0.8.0
## [9] scattermore_0.7 digest_0.6.27
## [11] htmltools_0.5.0 magick_2.5.2
## [13] fansi_0.4.1 panc8.SeuratData_3.0.2
## [15] tensor_1.5 cluster_2.1.0
## [17] ROCR_1.0-11 globals_0.14.0
## [19] modelr_0.1.8 matrixStats_0.57.0
## [21] colorspace_2.0-0 rvest_0.3.6
## [23] rappdirs_0.3.1 ggrepel_0.9.0
## [25] haven_2.3.1 xfun_0.19
## [27] stxBrain.SeuratData_0.1.1 crayon_1.3.4
## [29] jsonlite_1.7.2 spatstat_1.64-1
## [31] spatstat.data_1.7-0 survival_3.2-7
## [33] zoo_1.8-8 glue_1.4.2
## [35] polyclip_1.10-0 gtable_0.3.0
## [37] leiden_0.3.6 DelayedArray_0.16.0
## [39] Rhdf5lib_1.12.0 future.apply_1.6.0
## [41] BiocGenerics_0.36.0 HDF5Array_1.18.0
## [43] abind_1.4-5 scales_1.1.1
## [45] pheatmap_1.0.12 mvtnorm_1.1-1
## [47] bmcite.SeuratData_0.3.0 DBI_1.1.0
## [49] pbmc3k.SeuratData_3.1.4 miniUI_0.1.1.1
## [51] Rcpp_1.0.5 viridisLite_0.3.0
## [53] xtable_1.8-4 reticulate_1.18
## [55] rsvd_1.0.3 stats4_4.0.3
## [57] htmlwidgets_1.5.3 httr_1.4.2
## [59] RColorBrewer_1.1-2 ellipsis_0.3.1
## [61] ica_1.0-2 farver_2.0.3
## [63] pkgconfig_2.0.3 deldir_0.2-3
## [65] uwot_0.1.10 dbplyr_2.0.0
## [67] labeling_0.4.2 tidyselect_1.1.0
## [69] rlang_0.4.9 reshape2_1.4.4
## [71] later_1.1.0.1 munsell_0.5.0
## [73] cellranger_1.1.0 tools_4.0.3
## [75] cli_2.2.0 generics_0.1.0
## [77] broom_0.7.3 ggridges_0.5.2
## [79] evaluate_0.14 fastmap_1.0.1
## [81] yaml_2.2.1 goftest_1.2-2
## [83] knitr_1.30 fs_1.5.0
## [85] fitdistrplus_1.1-3 SeuratData_0.2.1
## [87] RANN_2.6.1 pbapply_1.4-3
## [89] future_1.21.0 nlme_3.1-151
## [91] mime_0.9 xml2_1.3.2
## [93] compiler_4.0.3 rstudioapi_0.13
## [95] plotly_4.9.2.2 filelock_1.0.2
## [97] png_0.1-7 spatstat.utils_1.17-0
## [99] reprex_0.3.0 stringi_1.5.3
## [101] basilisk.utils_1.2.1 lattice_0.20-41
## [103] Matrix_1.2-18 vctrs_0.3.6
## [105] pillar_1.4.7 lifecycle_0.2.0
## [107] rhdf5filters_1.2.0 BiocManager_1.30.10
## [109] lmtest_0.9-38 RcppAnnoy_0.0.18
## [111] data.table_1.13.4 irlba_2.3.3
## [113] httpuv_1.5.4 patchwork_1.1.1
## [115] R6_2.5.0 bookdown_0.21
## [117] promises_1.1.1 KernSmooth_2.23-18
## [119] gridExtra_2.3 IRanges_2.24.1
## [121] parallelly_1.22.0 codetools_0.2-18
## [123] MASS_7.3-53 assertthat_0.2.1
## [125] rhdf5_2.34.0 withr_2.3.0
## [127] sctransform_0.3.2 S4Vectors_0.28.1
## [129] mgcv_1.8-33 parallel_4.0.3
## [131] hms_0.5.3 rpart_4.1-15
## [133] grid_4.0.3 basilisk_1.2.1
## [135] rmarkdown_2.6 MatrixGenerics_1.2.0
## [137] Rtsne_0.15 shiny_1.5.0
## [139] lubridate_1.7.9.2