Intro Thoughts
Status Quo
library(tidyverse)
compute_panel_multi_response <- function(data,
scales,
cat_levels,
sep = ";"){
# data = data.frame(responses = survey$q09)
nrespondents <- nrow(data)
data %>%
summarise(responses = paste0(responses, collapse = sep)) %>%
mutate(response = str_split(responses, sep)) %>%
select(-responses) %>%
unnest(response) %>%
filter(response != "NA") %>%
count(response) %>%
mutate(x = factor(response, cat_levels) %>%
as.numeric %>% as.double()) %>%
mutate(y = as.double(n)) %>%
mutate(num_respondents = nrespondents)
}
fruit_cats <- c("apple", "banana", "pear", "orange" )
data.frame(selected_fruit =
c("banana;apple;pear",
"apple;pear",
"banana;pear")) %>%
select(responses = selected_fruit) %>%
compute_panel_multi_response(cat_levels = fruit_cats)
## # A tibble: 3 × 5
## response n x y num_respondents
## <chr> <int> <dbl> <dbl> <int>
## 1 apple 2 1 2 3
## 2 banana 2 2 2 3
## 3 pear 3 3 3 3
StatMulticat <- ggplot2::ggproto(`_class` = "StatMulticat",
`_inherit` = ggplot2::Stat,
compute_panel = compute_panel_multi_response,
default_aes = aes(label = ggplot2::after_stat(paste0(n, "/", num_respondents))))
data.frame(fruit_selections = c("banana;apple;pear", "apple;pear", "pear")) %>%
ggplot() +
aes(responses = fruit_selections) +
geom_bar(stat = StatMulticat, cat_levels = fruit_cats)
## Error in `geom_bar()`:
## ! Problem while computing position.
## ℹ Error occurred in the 1st layer.
## Caused by error in `-data$group`:
## ! invalid argument to unary operator
data.frame(fruit_selections = c("banana;apple;pear", "apple;pear", "pear")) %>%
ggplot() +
aes(responses = fruit_selections) +
layer(geom = "bar", stat = StatMulticat, position = "identity",
params = list(cat_levels = fruit_cats)) +
layer(geom = "label", stat = StatMulticat, position = "identity",
params = list(cat_levels = fruit_cats,
hjust = 0,
alpha = 0,
label.size = 0)) +
aes(fill = after_scale(x)) +
coord_flip() +
labs(title = "Number of respondents selecting each fruit when asked\n'Which of the following fruits do you like to eat?'",
subtitle = "Three people were surveyed")
compute_panel_multi_response <- function(data,
scales,
cat_levels = NULL,
sep = ";",
in_freq = F){
# data = data.frame(responses = survey$q09)
nrespondents <- nrow(data)
data %>%
summarise(responses = paste0(responses, collapse = sep)) %>%
mutate(response = str_split(responses, sep)) %>%
select(-responses) %>%
unnest(response) %>%
filter(response != "NA") %>%
count(response) ->
data
if(is.null(cat_levels)){cat_levels <- sort(unique(data$response))}
if(in_freq){cat_levels <- reorder(data$response, data$n)}
data %>%
mutate(x = factor(response, cat_levels) %>%
as.numeric %>% as.double()) ->
data
data %>%
mutate(y = as.double(n)) %>%
mutate(num_respondents = nrespondents)
}
summer_movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-07-30/summer_movies.csv')
## Rows: 905 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): tconst, title_type, primary_title, original_title, genres, simple_t...
## dbl (4): year, runtime_minutes, average_rating, num_votes
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summer_movies %>%
rename(responses = genres) %>%
compute_panel_multi_response(sep = ",") %>%
pull(response) ->
movie_genres
summer_movies %>%
ggplot() +
aes(responses = genres) +
geom_bar(stat = StatMulticat, sep = ",", in_freq = F)
## Warning in geom_bar(stat = StatMulticat, sep = ",", in_freq = F): Ignoring
## unknown parameters: `in_freq`
## Warning: Computation failed in `stat_multicat()`.
## Caused by error in `mutate()`:
## ℹ In argument: `x = factor(response, cat_levels) %>% as.numeric %>%
## as.double()`.
## Caused by error:
## ! argument "cat_levels" is missing, with no default
summer_movies %>%
ggplot() +
aes(responses = genres) +
layer(geom = "bar", stat = StatMulticat, position = "identity",
params = list(sep = ",", in_freq = F)) +
layer(geom = "label", stat = StatMulticat, position = "identity",
params = list(sep = ",",
hjust = 0,
alpha = 0,
label.size = 0,
in_freq = F)) +
coord_flip() +
scale_x_continuous(breaks = 1:length(movie_genres),
labels = movie_genres,
limits = c(1-.5, length(movie_genres)+.5)
)
## Warning: Ignoring unknown parameters: `in_freq`
## Warning: Ignoring unknown parameters: `in_freq`
## Warning: Computation failed in `stat_multicat()`.
## Computation failed in `stat_multicat()`.
## Caused by error in `mutate()`:
## ℹ In argument: `x = factor(response, cat_levels) %>% as.numeric %>%
## as.double()`.
## Caused by error:
## ! argument "cat_levels" is missing, with no default
Experiment
library(ggplot2)
compute_group_square <- function(data,scales){
data.frame(x = 1, y = 1) |>
dplyr::mutate(xmin = x - .5,
xmax = x + .5,
ymin = y - .5,
ymax = y + .5)
}
StatSquare <- ggproto("StatSquare",
Stat,
compute_group = compute_group_square)
ggplot(cars) +
geom_point(stat = StatSquare)
ggplot(cars) +
geom_rect(stat = StatSquare)
ggplot(cars) +
geom_bar(stat = StatSquare)
ggplot(cars) +
geom_col(stat = StatSquare)
## Warning in geom_col(stat = StatSquare): Ignoring unknown parameters: `stat`
## Error in `geom_col()`:
## ! Problem while setting up geom.
## ℹ Error occurred in the 1st layer.
## Caused by error in `compute_geom_1()`:
## ! `geom_col()` requires the following missing aesthetics: x and y.
ggplot(cars) +
geom_tile(stat = StatSquare, width = 5)
ggplot(cars) +
layer(geom = GeomCol,
stat = StatSquare,
position = "identity")
ggplot(cars) +
geom_text(stat = StatSquare, label = "hello")
ggplot(cars) +
geom_label(stat = StatSquare, label = "hello")
library(tidyverse)
compute_layer_prop_by <- function(data, ...){
if(is.null(data$value)){data$value <- 1}
if(is.null(data$prop_by)){data$prop_by <- 1}
if(is.null(data$wt)){data$wt <- 1}
data$value <- data$value * data$wt
data %>%
group_by(prop_by) %>%
summarize(group_sum = sum(.data$value)) ->
group_sum_df
data %>%
left_join(group_sum_df, by = "prop_by") %>%
group_by(PANEL, prop_by) %>%
summarize(sum = sum(.data$value),
group_sum = mean(.data$group_sum)) %>%
mutate(prop = sum/group_sum) %>%
mutate(prop_label = round(prop, 3)) %>%
mutate(percent_label = paste0(prop_label * 100, "%")) %>%
mutate(count_percent_label = paste0(sum, "\n(", percent_label, ")" )) %>%
mutate(percent_count_label = paste0(percent_label, "\n(", sum, ")" )) %>%
mutate(x = 0, y = 0)
}
StatTable <- ggproto(`_class` = "StatTable",
`_inherit` = Stat,
compute_layer = compute_layer_prop_by,
default_aes = aes(label = after_stat(sum)))
stat_table <- function(mapping = NULL,
data = NULL,
geom = ggplot2::GeomText,
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatTable, # proto object from step 2
geom = geom, # inherit other behavior
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
summer_movies %>%
mutate(genre = str_split(genres, ",")) %>%
unnest(genre) %>%
mutate(decade = floor(year/10) * 10) %>%
mutate(n = n(), .by = genre) %>%
filter(n >=30 ) %>%
filter(!is.na(runtime_minutes)) %>%
mutate(genre = fct_infreq(genre)) %>%
ggplot() +
stat_table(geom = "tile") +
stat_table(color = "whitesmoke") +
scale_x_continuous(expand = expansion()) +
scale_y_continuous(expand = expansion()) +
ggstamp::theme_void_fill("whitesmoke") ->
all; all
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
all +
facet_grid(rows = vars(genre)) +
aes(fill = after_stat(sum)) +
scale_fill_viridis_c(end = .8) +
theme(strip.text.y =
element_text(angle = 0, hjust = 0),
) +
theme(panel.spacing =
unit(0, "lines"))
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
last_plot() +
facet_grid(rows = vars(genre),
cols = vars(decade))
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
last_plot() +
aes(label = after_stat(percent_label)) +
aes(fill = after_stat(prop))
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
last_plot() +
aes(prop_by = decade)
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
last_plot() +
aes(prop_by = genre)
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'PANEL'. You can override using the
## `.groups` argument.