Status Quo
library(tidyverse)
compute_fill_collapse <- function(data, scales){
if(is.null(data$fill)){data$fill <- 1}
data |>
summarise(fill = sum(fill),
summary = sum(fill))
}
StatFillCollapse <- ggproto("StatFillCollapse",
Stat,
compute_group = compute_fill_collapse,
default_aes = aes(fill = after_stat(fill)))
set.seed(12346)
tibble(x = sample(c("North", "South"), 100, replace = T, prob = c(.3,.7)),
y = sample(c("Carolina", "Dakota"), 100, replace = T, prob = c(.3,.7)),
household_size = sample(1:7, 100, replace = T, prob = 7:1)) ->
df
df
## # A tibble: 100 × 3
## x y household_size
## <chr> <chr> <int>
## 1 North Dakota 3
## 2 South Dakota 2
## 3 South Dakota 5
## 4 South Carolina 3
## 5 North Carolina 4
## 6 North Dakota 2
## 7 South Carolina 6
## 8 South Dakota 1
## 9 South Dakota 7
## 10 South Carolina 2
## # ℹ 90 more rows
df |>
summarise(agg = sum(household_size), .by = c(x,y))
## # A tibble: 4 × 3
## x y agg
## <chr> <chr> <int>
## 1 North Dakota 70
## 2 South Dakota 169
## 3 South Carolina 72
## 4 North Carolina 23
ggplot(df) +
aes(x = x, y = y, fill = household_size) +
geom_point(shape = 21, size = 15, position = "jitter") +
labs(title = "'depth' is shown via jitter")

layer_data() |> nrow()
## [1] 100
# superficial fill, overplotting
ggplot(df) +
aes(x = x, y = y, fill = household_size) +
geom_point(shape = 21, size = 15)

layer_data() |> nrow()
## [1] 100
# deep fill, collapsing by group
ggplot(df) +
aes(x = x, y = y, fill = household_size) +
geom_point(stat = StatFillCollapse, shape = 21, size = 15)

layer_data()
## fill summary x y PANEL group shape colour size alpha stroke
## 1 #132B43 23 1 1 1 1 21 black 15 NA 0.5
## 2 #275378 70 1 2 1 2 21 black 15 NA 0.5
## 3 #28547B 72 2 1 1 3 21 black 15 NA 0.5
## 4 #56B1F7 169 2 2 1 4 21 black 15 NA 0.5
# uh-oh, log transformation not doing what might want
last_plot() +
scale_fill_viridis_c(transform = "log10")

layer_data()
## fill summary x y PANEL group shape colour size alpha stroke
## 1 #440154 3.107210 1 1 1 1 21 black 15 NA 0.5
## 2 #37678C 9.417096 1 2 1 2 21 black 15 NA 0.5
## 3 #36688C 9.512071 2 1 1 3 21 black 15 NA 0.5
## 4 #FDE725 22.017303 2 2 1 4 21 black 15 NA 0.5
# which is
# superficial fill with jitter
df |>
group_by(x = x, y = y) |>
summarise(z = sum(household_size)) ->
precompute
## `summarise()` has grouped output by 'x'. You can override using the `.groups`
## argument.
precompute |>
ggplot() +
aes(x = x, y = y, fill = z) +
geom_point(shape = 21, size = 15)

last_plot() +
scale_fill_viridis_c(transform = "log10")

# so some variations on StatFillCollase
# no default aes
StatFillCollapse$default_aes <- aes()
# gives us discrete scale
ggplot(df) +
aes(x = x, y = y, fill = household_size) +
geom_point(stat = StatFillCollapse, shape = 21, size = 15)

last_plot() +
scale_fill_viridis_c(transform = "log")

StatFillCollapse$default_aes <- aes(fill = after_stat(summary))
ggplot(df) +
aes(x = x, y = y, fill = household_size) + labs(fill = "agg household size") +
geom_point(stat = StatFillCollapse, shape = 21, size = 15)

last_plot() +
scale_fill_viridis_c(transform = "log")
