Intro Thoughts

Status Quo

library(tidyverse)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.4.1 ──
## ✔ broom        1.0.11     ✔ rsample      1.3.2 
## ✔ dials        1.4.2      ✔ tailor       0.1.0 
## ✔ infer        1.1.0      ✔ tune         2.0.1 
## ✔ modeldata    1.5.1      ✔ workflows    1.3.0 
## ✔ parsnip      1.4.1      ✔ workflowsets 1.1.1 
## ✔ recipes      1.3.1      ✔ yardstick    1.3.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
library(embed)
library(ggdims)

Experiment

dimred_recipes_layout <- function(data = data, step_dimred = step_pca){

  recipe(~ ., data = data) %>%
    step_normalize(all_predictors()) %>%
    step_dimred(all_predictors()) |> 
    prep() |> 
    juice() 
  
}

iris |> 
  select(Sepal.Length:Petal.Width) |> 
  dimred_recipes_layout()
## # A tibble: 150 × 4
##      PC1     PC2     PC3      PC4
##    <dbl>   <dbl>   <dbl>    <dbl>
##  1 -2.26 -0.478   0.127   0.0241 
##  2 -2.07  0.672   0.234   0.103  
##  3 -2.36  0.341  -0.0441  0.0283 
##  4 -2.29  0.595  -0.0910 -0.0657 
##  5 -2.38 -0.645  -0.0157 -0.0358 
##  6 -2.07 -1.48   -0.0269  0.00659
##  7 -2.44 -0.0475 -0.334  -0.0367 
##  8 -2.23 -0.222   0.0884 -0.0245 
##  9 -2.33  1.11   -0.145  -0.0268 
## 10 -2.18  0.467   0.253  -0.0398 
## # ℹ 140 more rows
iris |> 
  select(Sepal.Length:Petal.Width) |> 
  dimred_recipes_layout(step_dimred = step_umap)
## # A tibble: 150 × 2
##    UMAP1 UMAP2
##    <dbl> <dbl>
##  1  13.5 0.542
##  2  13.9 3.95 
##  3  14.2 2.94 
##  4  14.2 3.23 
##  5  14.1 0.675
##  6  15.3 0.463
##  7  13.9 1.78 
##  8  13.2 1.47 
##  9  14.7 3.74 
## 10  13.7 3.63 
## # ℹ 140 more rows
step_umap_1d <- function(recipe, ...){step_umap(recipe, ..., num_comp = 1)}

iris |> select(Sepal.Length:Petal.Width) |> 
  dimred_recipes_layout(step_dimred = step_umap_1d)
## # A tibble: 150 × 1
##     UMAP1
##     <dbl>
##  1 -12.8 
##  2  -7.90
##  3  -8.82
##  4  -8.67
##  5 -11.3 
##  6 -12.3 
##  7  -7.43
##  8  -9.74
##  9  -7.75
## 10  -8.38
## # ℹ 140 more rows
iris |> select(Sepal.Length:Petal.Width) |> 
  dimred_recipes_layout(step_dimred = step_isomap)
## 2026-03-16 14:43:10.796508: Isomap START
## 2026-03-16 14:43:10.796766: constructing knn graph
## 2026-03-16 14:43:10.806516: calculating geodesic distances
## 2026-03-16 14:43:10.815038: Classical Scaling
## # A tibble: 150 × 4
##    Isomap1 Isomap2 Isomap3 Isomap4
##      <dbl>   <dbl>   <dbl>   <dbl>
##  1    6.96   -1.39  -1.60   0.842 
##  2    4.90   -1.77  -0.982  0.246 
##  3    5.60   -1.67  -1.18   0.412 
##  4    5.18   -1.75  -1.04   0.279 
##  5    7.33   -1.34  -1.70   0.931 
##  6    8.71   -1.07  -2.13   1.37  
##  7    6.40   -1.56  -1.41   0.620 
##  8    6.52   -1.48  -1.46   0.707 
##  9    4.34   -1.93  -0.783  0.0144
## 10    5.31   -1.70  -1.10   0.355 
## # ℹ 140 more rows
iris |> select(Sepal.Length:Petal.Width) |> 
  dimred_recipes_layout(step_dimred = step_isomap)
## 2026-03-16 14:43:10.863716: Isomap START
## 2026-03-16 14:43:10.863888: constructing knn graph
## 2026-03-16 14:43:10.866424: calculating geodesic distances
## 2026-03-16 14:43:10.873998: Classical Scaling
## # A tibble: 150 × 4
##    Isomap1 Isomap2 Isomap3 Isomap4
##      <dbl>   <dbl>   <dbl>   <dbl>
##  1    6.96   -1.39  -1.60   0.842 
##  2    4.90   -1.77  -0.982  0.246 
##  3    5.60   -1.67  -1.18   0.412 
##  4    5.18   -1.75  -1.04   0.279 
##  5    7.33   -1.34  -1.70   0.931 
##  6    8.71   -1.07  -2.13   1.37  
##  7    6.40   -1.56  -1.41   0.620 
##  8    6.52   -1.48  -1.46   0.707 
##  9    4.34   -1.93  -0.783  0.0144
## 10    5.31   -1.70  -1.10   0.355 
## # ℹ 140 more rows
compute_tidy_dimred <- function(data, scales, step_dimred = step_pca){
  
    data_for_reduction <- data_vars_unpack(data)
    clean_data <- remove_missing(bind_cols(data_for_reduction, 
        data))
    set.seed(1345)
    bind_cols(dimred_recipes_layout(clean_data[names(data_for_reduction)],
                                    step_dimred), 
        clean_data)
  
}


compute_pca <- function (data, scales) {

  compute_tidy_dimred(data, scales)
  
}

compute_umap <- function (data, scales, num_comp = 2, neighbors = 15, epochs = NULL) {

  step_umap_spec <- function(recipe, ...){
    step_umap(recipe, ..., num_comp = num_comp, neighbors = neighbors, epochs = epochs)
    }

  compute_tidy_dimred(data, scales, step_dimred = step_umap_spec)
  
}


compute_umap_1d <- function (data, scales, neighbors = 15, epochs = NULL) {

  compute_umap(data, scales, num_comp = 1, neighbors = neighbors, epochs = epochs)
  
}

compute_isomap <- function (data, scales) {

  compute_tidy_dimred(data, scales, step_dimred = step_isomap)
  
}
StatUmap <- ggproto("StatUmap", Stat, 
                    compute_panel = compute_umap, 
                    default_aes = aes(x = after_stat(UMAP1), 
                                      y = after_stat(UMAP2)))

StatUmap1d <- ggproto("StatUmap1d", Stat, 
                    compute_panel = compute_umap_1d, 
                    default_aes = aes(x = after_stat(UMAP1)))

StatPca <- ggproto("StatPca", Stat, 
                    compute_panel = compute_pca, 
                    default_aes = aes(x = after_stat(PC1), 
                                      y = after_stat(PC2)))


StatIsomap <- ggproto("StatPca", Stat, 
                    compute_panel = compute_isomap, 
                    default_aes = aes(x = after_stat(Isomap1), 
                                      y = after_stat(Isomap2)))
geom_pca0 <- make_constructor(GeomPoint, stat = StatPca)
geom_umap0 <- make_constructor(GeomPoint, stat = StatUmap)
geom_umap_1d_0 <- make_constructor(GeomRug, stat = StatUmap1d)
geom_isomap0 <- make_constructor(GeomPoint, stat = StatIsomap)


geom_isomap <- function (..., 
                      update_spec = dims_expand(), 
                      layer_spec = geom_isomap0(...)) {
    list(update_spec, layer_spec)
}


geom_pca <- function (..., 
                      update_spec = dims_expand(), 
                      layer_spec = geom_pca0(...)) {
    list(update_spec, layer_spec)
}

geom_umap <- function (..., 
                      update_spec = dims_expand(), 
                      layer_spec = geom_umap0(...)) {
    list(update_spec, layer_spec)
}

geom_umap_1d <- function (..., 
                      update_spec = dims_expand(), 
                      layer_spec = geom_umap_1d_0(...)) {
    list(update_spec, layer_spec)
}

ggplot(iris) + 
  aes(dims = dims(Sepal.Length:Petal.Width)) + 
  geom_pca() + 
  aes(color = Species)
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0. Please use
## `as_label()` or `as_name()` instead.
## This warning is displayed once every 8 hours.

ggplot(iris) +
  aes(dims = dims(Sepal.Length:Petal.Width)) +
  geom_umap(color = "black")

ggplot(penguins) +
  aes(dims = dims(bill_length_mm:body_mass_g)) +
  geom_umap(epochs = 5) +
  aes(color = species)
## Warning: Removed 2 rows containing missing values or values outside the scale
## range.

ggplot(penguins) +
  aes(dims = dims(bill_length_mm:body_mass_g)) +
  geom_umap(epochs = 15) +
  aes(color = species)
## Warning: Removed 2 rows containing missing values or values outside the scale
## range.

ggplot(penguins) +
  aes(dims = dims(bill_length_mm:body_mass_g)) +
  geom_umap(epochs = 100) +
  aes(color = species)
## Warning: Removed 2 rows containing missing values or values outside the scale
## range.

ggplot(iris)  + 
  aes(x = Sepal.Length, y = Sepal.Width) + 
  geom_point() + 
  aes(color = Species) 

ggplot(iris)  + 
  aes(dims = dims(Sepal.Length, Sepal.Width)) +
  geom_umap_1d() + 
  aes(color = Species) 

ggplot(iris)  + 
  aes(dims = dims(Sepal.Length, Sepal.Width)) +
  geom_isomap() + 
  aes(color = Species) 
## 2026-03-16 14:43:14.013192: Isomap START
## 2026-03-16 14:43:14.013405: constructing knn graph
## 2026-03-16 14:43:14.015778: calculating geodesic distances
## 2026-03-16 14:43:14.02409: Classical Scaling

ggplot(iris)  + 
  aes(dims = dims(Sepal.Length, Sepal.Width)) +
  geom_umap() + 
  aes(color = Species) 

ggplyr:::last_plot_wipe_last() +
  geom_umap(neighbors = 12)

ggplyr:::last_plot_wipe_last() +
  geom_umap(neighbors = 5)

ggplot(penguins) +
  aes(dims = dims(bill_length_mm:body_mass_g)) +
  aes(color = species) ->
p0

p0 + geom_umap(epochs = 1) ->
p; p0 + geom_umap(epochs = 2) ->
p; p0 + geom_umap(epochs = 3)  ->
p; p0 + geom_umap(epochs = 5)  ->
p; p0 + geom_umap(epochs = 8)  ->
p; p0 + geom_umap(epochs = 13)  ->
p; p0 + geom_umap(epochs = 21)  ->
p; p0 + geom_umap(epochs = 34)  ->
p; p0 + geom_umap(epochs = 55)  ->
p; p0 + geom_umap(epochs = 89)
## Warning: Removed 2 rows containing missing values or values outside the scale
## range.