Intro Thoughts

Status Quo

library(tidyverse)
compute_panel_multi_response <- function(data, 
                                         scales, 
                                         cat_levels, 
                                         sep = ";"){
  
  # data = data.frame(responses = survey$q09)
  nrespondents <- nrow(data)

  data %>%
    summarise(responses = paste0(responses, collapse = sep)) %>% 
    mutate(response = str_split(responses, sep)) %>% 
    select(-responses) %>% 
    unnest(response) %>% 
    filter(response != "NA") %>% 
    count(response) %>%
    mutate(x = factor(response, cat_levels) %>% 
             as.numeric %>% as.double()) %>%
    mutate(y = as.double(n)) %>% 
    mutate(num_respondents = nrespondents)
  
}


fruit_cats <- c("apple", "banana", "pear", "orange" )


data.frame(selected_fruit = 
             c("banana;apple;pear", 
               "apple;pear", 
               "banana;pear")) %>%
  select(responses = selected_fruit) %>% 
  compute_panel_multi_response(cat_levels = fruit_cats)
## # A tibble: 3 × 5
##   response     n     x     y num_respondents
##   <chr>    <int> <dbl> <dbl>           <int>
## 1 apple        2     1     2               3
## 2 banana       2     2     2               3
## 3 pear         3     3     3               3
StatMulticat <- ggplot2::ggproto(`_class` = "StatMulticat",
                        `_inherit` = ggplot2::Stat,  
                        compute_panel = compute_panel_multi_response,
                        default_aes = aes(label = ggplot2::after_stat(paste0(n, "/", num_respondents))))

data.frame(fruit_selections = c("banana;apple;pear", "apple;pear", "pear")) %>% 
  ggplot() +  
  aes(responses = fruit_selections) +
  geom_bar(stat = StatMulticat, cat_levels = fruit_cats)
## Error in `geom_bar()`:
## ! Problem while computing position.
## ℹ Error occurred in the 1st layer.
## Caused by error in `-data$group`:
## ! invalid argument to unary operator
data.frame(fruit_selections = c("banana;apple;pear", "apple;pear", "pear")) %>% 
  ggplot() +  
  aes(responses = fruit_selections) + 
  layer(geom = "bar", stat = StatMulticat, position = "identity",
         params = list(cat_levels = fruit_cats)) + 
  layer(geom = "label", stat = StatMulticat, position = "identity",
        params = list(cat_levels = fruit_cats, 
                      hjust = 0,
                      alpha = 0,
                      label.size = 0)) +
  aes(fill = after_scale(x)) +
  coord_flip() + 
  labs(title = "Number of respondents selecting each fruit when asked\n'Which of the following fruits do you like to eat?'",
       subtitle = "Three people were surveyed")

compute_panel_multi_response <- function(data, 
                                         scales, 
                                         cat_levels = NULL, 
                                         sep = ";",
                                         in_freq = F){
  
  
  # data = data.frame(responses = survey$q09)
  nrespondents <- nrow(data)

  data %>%
    summarise(responses = paste0(responses, collapse = sep)) %>% 
    mutate(response = str_split(responses, sep)) %>% 
    select(-responses) %>% 
    unnest(response) %>% 
    filter(response != "NA") %>% 
    count(response) ->
  data
  
  if(is.null(cat_levels)){cat_levels <- sort(unique(data$response))}
  
  if(in_freq){cat_levels <- reorder(data$response, data$n)}
  
  data %>% 
    mutate(x = factor(response, cat_levels) %>% 
             as.numeric %>% as.double()) ->
  data
  
  data %>% 
    mutate(y = as.double(n)) %>% 
    mutate(num_respondents = nrespondents)
  
}



summer_movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-07-30/summer_movies.csv')
## Rows: 905 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): tconst, title_type, primary_title, original_title, genres, simple_t...
## dbl (4): year, runtime_minutes, average_rating, num_votes
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summer_movies %>% 
  rename(responses = genres) %>% 
  compute_panel_multi_response(sep = ",") %>% 
  pull(response) ->
movie_genres


summer_movies %>% 
  ggplot() +  
  aes(responses = genres)  + 
  geom_bar(stat = StatMulticat, sep = ",", in_freq = F)
## Warning in geom_bar(stat = StatMulticat, sep = ",", in_freq = F): Ignoring
## unknown parameters: `in_freq`
## Warning: Computation failed in `stat_multicat()`.
## Caused by error in `mutate()`:
## ℹ In argument: `x = factor(response, cat_levels) %>% as.numeric %>%
##   as.double()`.
## Caused by error:
## ! argument "cat_levels" is missing, with no default

summer_movies %>% 
  ggplot() +  
  aes(responses = genres)  + 
  layer(geom = "bar", stat = StatMulticat, position = "identity",
         params = list(sep = ",", in_freq = F)) +
  layer(geom = "label", stat = StatMulticat, position = "identity",
        params = list(sep = ",",
                      hjust = 0,
                      alpha = 0,
                      label.size = 0,
                      in_freq = F)) + 
  coord_flip()  +
  scale_x_continuous(breaks = 1:length(movie_genres), 
                     labels = movie_genres, 
                     limits = c(1-.5, length(movie_genres)+.5)
                    ) 
## Warning: Ignoring unknown parameters: `in_freq`
## Warning: Ignoring unknown parameters: `in_freq`
## Warning: Computation failed in `stat_multicat()`.
## Computation failed in `stat_multicat()`.
## Caused by error in `mutate()`:
## ℹ In argument: `x = factor(response, cat_levels) %>% as.numeric %>%
##   as.double()`.
## Caused by error:
## ! argument "cat_levels" is missing, with no default

Experiment

library(ggplot2)
compute_group_square <- function(data,scales){
  
  data.frame(x = 1, y = 1) |> 
    dplyr::mutate(xmin = x - .5, 
           xmax = x + .5,
           ymin = y - .5,
           ymax = y + .5)
  
}



StatSquare <- ggproto("StatSquare", 
                      Stat,
                      compute_group = compute_group_square)

ggplot(cars) + 
  geom_point(stat = StatSquare)

ggplot(cars) + 
  geom_rect(stat = StatSquare)

ggplot(cars) + 
  geom_bar(stat = StatSquare)

ggplot(cars) + 
  geom_col(stat = StatSquare)
## Warning in geom_col(stat = StatSquare): Ignoring unknown parameters: `stat`
## Error in `geom_col()`:
## ! Problem while setting up geom.
## ℹ Error occurred in the 1st layer.
## Caused by error in `compute_geom_1()`:
## ! `geom_col()` requires the following missing aesthetics: x and y.
ggplot(cars) + 
  geom_tile(stat = StatSquare, width = 5)

ggplot(cars) + 
  layer(geom = GeomCol, 
        stat = StatSquare,
        position = "identity")

ggplot(cars) + 
  geom_text(stat = StatSquare, label = "hello")

ggplot(cars) + 
  geom_label(stat = StatSquare, label = "hello")

library(tidyverse)
compute_layer_prop_by <- function(data, ...){
  if(is.null(data$value)){data$value <- 1}
  if(is.null(data$prop_by)){data$prop_by <- 1}
  if(is.null(data$wt)){data$wt <- 1}
  data$value <- data$value * data$wt
  data %>%
    group_by(prop_by) %>%
    summarize(group_sum = sum(.data$value)) ->
  group_sum_df
  data %>%
    left_join(group_sum_df, by = "prop_by") %>%
    group_by(PANEL, prop_by) %>%
    summarize(sum = sum(.data$value),
              group_sum = mean(.data$group_sum)) %>%
    mutate(prop = sum/group_sum) %>%
    mutate(prop_label = round(prop, 3)) %>%
    mutate(percent_label = paste0(prop_label * 100, "%")) %>%
    mutate(count_percent_label = paste0(sum, "\n(", percent_label, ")" )) %>%
    mutate(percent_count_label = paste0(percent_label, "\n(", sum, ")" )) %>%
    mutate(x = 0, y = 0)
}


StatTable <- ggproto(`_class` = "StatTable",
                    `_inherit` = Stat,
                    compute_layer = compute_layer_prop_by,
                    default_aes = aes(label = after_stat(sum)))

stat_table <- function(mapping = NULL, 
                       data = NULL,
                       geom = ggplot2::GeomText,
                       position = "identity", 
                       na.rm = FALSE,
                       show.legend = NA,
                       inherit.aes = TRUE, ...) {
  ggplot2::layer(
    stat = StatTable,        # proto object from step 2
    geom = geom,   # inherit other behavior
    data = data, 
    mapping = mapping,
    position = position, 
    show.legend = show.legend, 
    inherit.aes = inherit.aes,
    params = list(na.rm = na.rm, ...)
  )
}


summer_movies %>%
  mutate(genre = str_split(genres, ",")) %>% 
  unnest(genre) %>% 
  mutate(decade = floor(year/10) * 10) %>%
  mutate(n = n(), .by = genre) %>% 
  filter(n >=30 ) %>% 
  filter(!is.na(runtime_minutes)) %>%
  mutate(genre = fct_infreq(genre)) %>%  
  ggplot() +
  stat_table(geom = "tile") +
  stat_table(color = "whitesmoke") + 
  scale_x_continuous(expand = expansion()) + 
  scale_y_continuous(expand = expansion()) + 
  ggstamp::theme_void_fill("whitesmoke") ->
all; all
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.

all +
  facet_grid(rows = vars(genre)) + 
  aes(fill = after_stat(sum)) + 
  scale_fill_viridis_c(end = .8)  +
  theme(strip.text.y =  
          element_text(angle = 0, hjust = 0), 
        ) +
   theme(panel.spacing =  
          unit(0, "lines"))   
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.

last_plot() +
  facet_grid(rows = vars(genre),
             cols = vars(decade))
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.

last_plot() +
  aes(label = after_stat(percent_label)) + 
  aes(fill = after_stat(prop))
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.

last_plot() +
  aes(prop_by = decade)
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.

last_plot() +
  aes(prop_by = genre)
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.

Closing remarks, Other Relevant Work, Caveats