Data Cleaning

library(tidyverse)
coffee_survey <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-05-14/coffee_survey.csv')
## Rows: 4042 Columns: 57
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (44): submission_id, age, cups, where_drink, brew, brew_other, purchase,...
## dbl (13): expertise, coffee_a_bitterness, coffee_a_acidity, coffee_a_persona...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
coffee_survey$age %>% 
  unique() %>% 
  sort() ->
alphabetical_age; alphabetical_age
## [1] "<18 years old"   ">65 years old"   "18-24 years old" "25-34 years old"
## [5] "35-44 years old" "45-54 years old" "55-64 years old"
age_cats <- alphabetical_age[c(1,3:7, 2)]

Plot 1: A basic bar chart

coffee_survey |>
  filter(!is.na(age)) |> 
  ggplot() + 
  aes(y = factor(age, age_cats)) + 
  geom_bar() + 
  labs(title = "Number of respondents in each\nage category in the 'coffee survey'") +
  labs(tag = "Plot 1")

Plot 2: some thematic adjustment

last_plot() + 
  # change y axis labels
  scale_y_discrete(breaks = age_cats, 
                   labels = age_cats |> 
                     str_replace(" years old", "yrs")) + 
  # theme and labs adjustments
  theme_minimal(base_size = 18) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.y = element_blank()) + 
  theme(plot.title.position = "plot") +
  labs(y = NULL)  + 
  labs(tag = "Plot 2")

Plot 3: direct labels - using ‘label’ geom in stat_count to control spacing

you may be tempted to get padded “text” layer using hjust = -.2, won’t give you consistent padding if you have variable length text for your labels.

last_plot() + 
  aes(label = after_stat(count)) +
  stat_count(geom = "label", 
             hjust = 0, 
             size = 5) +
  scale_x_continuous(expand = expansion(c(0, .12))) + 
  labs(tag = "Plot 3")

Plot 4: Now remove “label” layer (used for demonstration purposes), replace with one where label.size = NA

ggwipe::last_plot_wipe_last() + 
  stat_count(geom = "label", 
             hjust = 0, 
             alpha = .8,
             label.size = NA 
             ) + 
  labs(tag = "Plot 4")

Plot 5: remove bar layer, replace with colorful, slightly transparent layer

ggwipe::last_plot_wipe(index = 1) + # remove bar layer 
  geom_bar(fill = "seagreen4", alpha = .7) + 
  labs(tag = "Plot 5")

Plot 6: An experiment with a callout layer.

function preparation

bar_callout <- function(data, 
                             nudge_y = 0,
                             nudge_x = 0, 
                             quantile_x = .5,
                             quantile_y = .5, ...){
  
  callout_aes <- aes(x = quantile(c(xmin, xmax), quantile_x) + nudge_x,
                   y = quantile(c(ymin, ymax), quantile_y) + nudge_y,
                   label = label %>% str_wrap(30),
                   xend = quantile(c(xmin, xmax), quantile_x),
                   yend = quantile(c(ymin, ymax), quantile_y))
  
  
  list(
    geom_label(mapping = callout_aes, data = data, ...),
    geom_curve(mapping = callout_aes, data = data, ...)
       )
  
}

Use in plot

my_callout <- "25-34 year olds are well represented in this survey.  With almost two thousand respondents, it has more than double the number of respondents than any other age category."

last_plot() +
  bar_callout(data = layer_data(plot = last_plot(), # looking at data 
                                i = 2) %>% #that is used to draw bar layer (now second layer in plot)
                .[3,], # and specifically the third bar in series
                   quantile_x = .55, # where does curve emanate from in bar
                   quantile_y = .8,
                   nudge_y = 1.5, # How far in y should text be away
                   nudge_x = 200, 
                   label = my_callout %>% str_wrap(27), 
             vjust = .3,
             hjust = 0,
             color = "grey35", curvature = .1) + 
  labs(x = NULL) +
  labs(tag = "Plot 6")

ggwipe::last_plot_wipe_last() 

ggwipe::last_plot_wipe_last() %+%
  (coffee_survey %>% filter(!is.na(favorite))) +
  aes(y = favorite |> str_wrap(20) |> fct_infreq() |> fct_rev()) + 
  labs(y = NULL) +
  scale_y_discrete() + 
  labs(title = "Number of responses that drink type was the favorite ")
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggwipe::last_plot_wipe() + 
  aes(x = age, label = NULL) + 
  scale_x_discrete() + 
  geom_tile() +
  aes(fill = prop) %+%
  (coffee_survey %>% 
  filter(!is.na(age), !is.na(favorite)) %>% 
  group_by(favorite, age) %>% 
  summarize(n = n()) %>% 
  mutate(prop = n/sum(n)))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.