library(tidyverse)
coffee_survey <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-05-14/coffee_survey.csv')
## Rows: 4042 Columns: 57
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (44): submission_id, age, cups, where_drink, brew, brew_other, purchase,...
## dbl (13): expertise, coffee_a_bitterness, coffee_a_acidity, coffee_a_persona...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
coffee_survey$age %>%
unique() %>%
sort() ->
alphabetical_age; alphabetical_age
## [1] "<18 years old" ">65 years old" "18-24 years old" "25-34 years old"
## [5] "35-44 years old" "45-54 years old" "55-64 years old"
age_cats <- alphabetical_age[c(1,3:7, 2)]
coffee_survey |>
filter(!is.na(age)) |>
ggplot() +
aes(y = factor(age, age_cats)) +
geom_bar() +
labs(title = "Number of respondents in each\nage category in the 'coffee survey'") +
labs(tag = "Plot 1")
last_plot() +
# change y axis labels
scale_y_discrete(breaks = age_cats,
labels = age_cats |>
str_replace(" years old", "yrs")) +
# theme and labs adjustments
theme_minimal(base_size = 18) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(plot.title.position = "plot") +
labs(y = NULL) +
labs(tag = "Plot 2")
you may be tempted to get padded “text” layer using hjust = -.2, won’t give you consistent padding if you have variable length text for your labels.
last_plot() +
aes(label = after_stat(count)) +
stat_count(geom = "label",
hjust = 0,
size = 5) +
scale_x_continuous(expand = expansion(c(0, .12))) +
labs(tag = "Plot 3")
ggwipe::last_plot_wipe_last() +
stat_count(geom = "label",
hjust = 0,
alpha = .8,
label.size = NA
) +
labs(tag = "Plot 4")
ggwipe::last_plot_wipe(index = 1) + # remove bar layer
geom_bar(fill = "seagreen4", alpha = .7) +
labs(tag = "Plot 5")
bar_callout <- function(data,
nudge_y = 0,
nudge_x = 0,
quantile_x = .5,
quantile_y = .5, ...){
callout_aes <- aes(x = quantile(c(xmin, xmax), quantile_x) + nudge_x,
y = quantile(c(ymin, ymax), quantile_y) + nudge_y,
label = label %>% str_wrap(30),
xend = quantile(c(xmin, xmax), quantile_x),
yend = quantile(c(ymin, ymax), quantile_y))
list(
geom_label(mapping = callout_aes, data = data, ...),
geom_curve(mapping = callout_aes, data = data, ...)
)
}
my_callout <- "25-34 year olds are well represented in this survey. With almost two thousand respondents, it has more than double the number of respondents than any other age category."
last_plot() +
bar_callout(data = layer_data(plot = last_plot(), # looking at data
i = 2) %>% #that is used to draw bar layer (now second layer in plot)
.[3,], # and specifically the third bar in series
quantile_x = .55, # where does curve emanate from in bar
quantile_y = .8,
nudge_y = 1.5, # How far in y should text be away
nudge_x = 200,
label = my_callout %>% str_wrap(27),
vjust = .3,
hjust = 0,
color = "grey35", curvature = .1) +
labs(x = NULL) +
labs(tag = "Plot 6")
ggwipe::last_plot_wipe_last()
ggwipe::last_plot_wipe_last() %+%
(coffee_survey %>% filter(!is.na(favorite))) +
aes(y = favorite |> str_wrap(20) |> fct_infreq() |> fct_rev()) +
labs(y = NULL) +
scale_y_discrete() +
labs(title = "Number of responses that drink type was the favorite ")
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
ggwipe::last_plot_wipe() +
aes(x = age, label = NULL) +
scale_x_discrete() +
geom_tile() +
aes(fill = prop) %+%
(coffee_survey %>%
filter(!is.na(age), !is.na(favorite)) %>%
group_by(favorite, age) %>%
summarize(n = n()) %>%
mutate(prop = n/sum(n)))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.