Intro Thoughts

Status Quo

library(tidyverse)

compute_fill_collapse <- function(data, scales){
  
  if(is.null(data$fill)){data$fill <- 1}
  
  data |> 
    summarise(fill = sum(fill),
              summary = sum(fill))
  
}


StatFillCollapse <- ggproto("StatFillCollapse", 
                            Stat,
                            compute_group = compute_fill_collapse,
                            default_aes = aes(fill = after_stat(fill)))


set.seed(12346)

tibble(x = sample(c("North", "South"), 100, replace = T, prob = c(.3,.7)), 
       y = sample(c("Carolina", "Dakota"), 100, replace = T, prob = c(.3,.7)),
       household_size = sample(1:7, 100, replace = T, prob = 7:1)) ->
df

df
## # A tibble: 100 × 3
##    x     y        household_size
##    <chr> <chr>             <int>
##  1 North Dakota                3
##  2 South Dakota                2
##  3 South Dakota                5
##  4 South Carolina              3
##  5 North Carolina              4
##  6 North Dakota                2
##  7 South Carolina              6
##  8 South Dakota                1
##  9 South Dakota                7
## 10 South Carolina              2
## # ℹ 90 more rows
df |> 
  summarise(agg = sum(household_size), .by = c(x,y))
## # A tibble: 4 × 3
##   x     y          agg
##   <chr> <chr>    <int>
## 1 North Dakota      70
## 2 South Dakota     169
## 3 South Carolina    72
## 4 North Carolina    23
ggplot(df) + 
  aes(x = x, y = y, fill = household_size) + 
  geom_point(shape = 21, size = 15, position = "jitter") + 
  labs(title = "'depth' is shown via jitter")

layer_data() |> nrow()
## [1] 100
# superficial fill, overplotting
ggplot(df) + 
  aes(x = x, y = y, fill = household_size) + 
  geom_point(shape = 21, size = 15)

layer_data() |> nrow()
## [1] 100
# deep fill, collapsing by group
ggplot(df) + 
  aes(x = x, y = y, fill = household_size) + 
  geom_point(stat = StatFillCollapse, shape = 21, size = 15)

layer_data()
##      fill summary x y PANEL group shape colour size alpha stroke
## 1 #132B43      23 1 1     1     1    21  black   15    NA    0.5
## 2 #275378      70 1 2     1     2    21  black   15    NA    0.5
## 3 #28547B      72 2 1     1     3    21  black   15    NA    0.5
## 4 #56B1F7     169 2 2     1     4    21  black   15    NA    0.5
# uh-oh, log transformation not doing what might want
last_plot() + 
  scale_fill_viridis_c(transform = "log10")

layer_data()
##      fill   summary x y PANEL group shape colour size alpha stroke
## 1 #440154  3.107210 1 1     1     1    21  black   15    NA    0.5
## 2 #37678C  9.417096 1 2     1     2    21  black   15    NA    0.5
## 3 #36688C  9.512071 2 1     1     3    21  black   15    NA    0.5
## 4 #FDE725 22.017303 2 2     1     4    21  black   15    NA    0.5
# which is 

# superficial fill with jitter
df |> 
  group_by(x = x, y =  y) |>
  summarise(z = sum(household_size)) ->
precompute
## `summarise()` has grouped output by 'x'. You can override using the `.groups`
## argument.
precompute |>
  ggplot() + 
  aes(x = x, y = y, fill = z) + 
  geom_point(shape = 21, size = 15)

last_plot() + 
  scale_fill_viridis_c(transform = "log10")

# so some variations on StatFillCollase

# no default aes
StatFillCollapse$default_aes <- aes()

# gives us discrete scale
ggplot(df) + 
  aes(x = x, y = y, fill = household_size) + 
  geom_point(stat = StatFillCollapse, shape = 21, size = 15)

last_plot() + 
  scale_fill_viridis_c(transform = "log")

StatFillCollapse$default_aes <- aes(fill = after_stat(summary))

ggplot(df) + 
  aes(x = x, y = y, fill = household_size) + labs(fill = "agg household size") +
  geom_point(stat = StatFillCollapse, shape = 21, size = 15)

last_plot() + 
  scale_fill_viridis_c(transform = "log")

Closing remarks, Other Relevant Work, Caveats