Manifold learning in R - Basic Syntax

Jackson Kwok

2020-04-07

knitr::opts_chunk$set(echo = TRUE, eval = T, fig.width = 6, fig.height = 6)

The following is a quick reference on how to call dimensionality reduction functions in R. This includes PCA, MDS, Isomap, Locally Linear Embedding (LLE), Diffusion Map, t-SNE, KPCA, Stochastic Proximity Embedding (SPE), Laplacian Eigenmaps, Hessian LLE and Local Tangent Space Alignment(LTSA).

First we simulate some data.

library(maniTools)
num_pts = 600
d = 2   #target dimension
k = 8   #k nearest neighbors
sim_data <- swiss_roll(num_pts)
plotly_3D(sim_data)

Now we do dimensionality reduction. We begin with some classical techniques.

# PCA (on centered and scaled data)
pca_dr <- sim_data$data %>% center_and_standardise() %>% prcomp()
proj_data <- sim_data$data %*% pca_dr$rotation[,1:2]
plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "PCA")

# MDS
proj_data <- cmdscale(dist(sim_data$data), k = d)
plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "MDS")

Next, we look at some local nonlinear techniques.

# LLE
proj_data <- LLE2(sim_data$data, dim = d, k = k)

## Computing distance matrix ... done
## Computing low dimensional emmbedding (using 8 nearest neighbours)... done

plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "LLE")

# Hessian LLE
proj_data <- Hessian_LLE(sim_data$data, k = k, d = d)$projection
plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "Hessian LLE")

# Laplacian Eigenmaps
proj_data <- Laplacian_Eigenmaps(sim_data$data, k = k, d = d)$eigenvectors
plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "Laplacian Eigenmaps")

# LTSA
proj_data <- Local_TSA(sim_data$data, k = k, d = d)
plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "Local Tangent Space Alignment")

Finally, we have global nonlinear techniques.

# Isomap
proj_data <- RDRToolbox::Isomap(sim_data$data, dims = d, k = k)$dim2

## Computing distance matrix ... done
## Building graph with shortest paths (using 8 nearest neighbours) ... done
## Computing low dimensional embedding ... done
## number of samples: 600
## reduction from 3 to 2 dimensions
## number of connected components in graph: 1

plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "ISOMAP")

# Diffusion Map
proj_data <- diffusionMap::diffuse(dist(sim_data$data), neigen = d)$X

## Performing eigendecomposition
## Computing Diffusion Coordinates
## Elapsed time: 0.198 seconds

plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "Diffusion Map")

# t-SNE
proj_data <- tsne::tsne(sim_data$data, k = d)

## sigma summary: Min. : 0.364481595829211 |1st Qu. : 0.436000083420641 |Median : 0.458631148029101 |Mean : 0.460088497823795 |3rd Qu. : 0.481998126457497 |Max. : 0.551140784632338 |
## Epoch: Iteration #100 error is: 12.2295022687842
## Epoch: Iteration #200 error is: 0.410916476005275
## Epoch: Iteration #300 error is: 0.37602565630267
## Epoch: Iteration #400 error is: 0.368510975964706
## Epoch: Iteration #500 error is: 0.365314175513285
## Epoch: Iteration #600 error is: 0.3641951935611
## Epoch: Iteration #700 error is: 0.36354591840718
## Epoch: Iteration #800 error is: 0.363130301806567
## Epoch: Iteration #900 error is: 0.362844894085971
## Epoch: Iteration #1000 error is: 0.362637929708366

plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "t-SNE")

# SPE
proj_data <- spe::spe(sim_data$data, edim = d)$x
plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "SPE")

# KernelPCA
proj_data <- kernlab::kpca(sim_data$data, kernel = 'laplacedot', features = d)@pcv
plotly_2D(proj_data, sim_data$colors) %>% plotly::layout(title = "Kernel PCA")