Experiment
PAIR_code_umap_mammoth_url <- "https://raw.githubusercontent.com/PAIR-code/understanding-umap/refs/heads/master/raw_data/mammoth_3d.json"
library(ggdims)
mammoth_df <- PAIR_code_umap_mammoth_url |>
jsonlite::fromJSON() |>
as.data.frame()
setup_data_confetti <- function(data, params){
data$fill <- sample(
x = colors(),
size = nrow(data),
replace = TRUE
)
data
}
new_mapping <- aes(fill = "grey", shape = 21,
size = 4, color = from_theme(paper),
stroke = .2, alpha = .5)
GeomConfetti <- ggproto("GeomConfetti",
GeomPoint,
default_aes = GeomPoint$default_aes |>
modifyList(new_mapping),
setup_data = setup_data_confetti
)
geom_confetti <- make_constructor(GeomConfetti)
library(ggplot2)
ggplot(cars) +
aes(speed, dist) +
geom_confetti()

'
mammoth_df <-
PAIR_code_umap_mammoth_url |> #<<
jsonlite::fromJSON() |>
as.data.frame()
mammoth_df |> #<<
ggplot() +
aes(x = V1, y = V2) +
geom_confetti() ' |>
ggram::ggram(code = _, widths = c(1,1.3), "First two dimensions of the 3D Smithsonian Mammoth dataset from Pair code, 10000 observations 🦣", subtitle = "The now-canonical mammoth dataset can be a fun and intuitive first-taste when exploring dimension reduction techniques,\nfor going from still-familiar 3 dimentional data to the lower-dim representations.",
caption = "geom_confetti is a variant of geom_point, whose colors randomly sampled from R colors() which 'returns the built-in color names'")#
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_hline()`).

'
mammoth_df <- #<<
PAIR_code_umap_mammoth_url |> #<<
jsonlite::fromJSON() |> #<<
as.data.frame() #<<
high_dim <- mammoth_df |>
ggplot() +
aes(x = V1, y = V2) +
geom_confetti()
library(ggdims) #<<
low_dim <- mammoth_df |>
ggplot() +
aes(dims = dims(V1:V3)) + #<<
geom_pca(fill = "cadetblue2") #<<
high_dim + low_dim # w/ patchwork
' |>
ggram::ggram(code = _, widths = c(.8,2), "First two dimensions of the 3D Smithsonian Mammoth dataset from Pair code, 10000 observations 🦣", subtitle = "The now-canonical mammoth dataset can be a fun and intuitive first-taste when exploring dimension reduction techniques,\nfor going from still-familiar 3 dimentional data to the lower-dim representations.",
caption = "geom_confetti is a variant of geom_point, whose colors randomly sampled from R colors() which 'returns the built-in color names'")#
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0. Please use
## `as_label()` or `as_name()` instead.
## This warning is displayed once every 8 hours.
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_hline()`).

update_geom_defaults("point", aes(fill = "grey", color = "white", size = 1))
'
colors_random <-
sample(
x = colors(),
size = nrow(mammoth_df),
replace = TRUE
)
ggplot(data = mammoth_df) +
aes(x = V1, y = V2) +
geom_point(
alpha = .5,
fill = colors_random #<<
)' |>
ggram::ggram(code = _, widths = c(1,1.3), "First two dimensions of the 3D Smithsonian Mammoth dataset, 10000 observations 🦣", subtitle = "The now-canonical mammoth dataset can be a fun first-taste when exploring dimension reduction techniques, for going from familiar and friendly 3 dimentional data to the lower-dim representations.",
caption = "Point colors randomly sampled from R colors() which 'returns the built-in color names'")# +
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_tile()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_hline()`).

# annotate("label", x = I(.2), y = I(.2), size = 1.7,
# label = "ggplot2 is a system for declaratively creating graphics, based on The Grammar of Graphics. You provide the data, tell ggplot2 how to map variables to aesthetics, what graphical primitives to use, and it takes care of the details." |> str_wrap(30))
# library(patchwork)
# patchwork::plot_annotation()
colors_random <- colors() |>
sample(
nrow(mammoth_df),
replace = T
)
p1 <- mammoth_df |>
ggplot() +
aes(dims = dims(V1:V3),
fill = I(colors_random)) +
geom_pca() +
labs(title = "PCA")
p2 <- mammoth_df |>
ggplot() +
aes(dims = dims(V1:V3),
fill = I(colors_random)) +
geom_tsne() +
labs(title = "t-SNE")
p3 <- mammoth_df |>
ggplot() +
aes(dims = dims(V1:V3),
fill = I(colors_random)) +
geom_umap() +
labs(title = "UMAP")
p1 + p2 + p3
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## ℹ The deprecated feature was likely used in the ggdims package.
## Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## New names:
## • `V3` -> `V3...3`
## • `V3` -> `V3...7`
