knitr::opts_chunk$set(message = F, warning = F)

Step 00. prep some data, records and flat

library(tidyverse)
library(magrittr)

Titanic %>% 
  data.frame() %>% 
  uncount(weights = Freq) ->
tidy_titanic ; tidy_titanic %>% head()
##     Class  Sex   Age Survived
## 3     3rd Male Child       No
## 3.1   3rd Male Child       No
## 3.2   3rd Male Child       No
## 3.3   3rd Male Child       No
## 3.4   3rd Male Child       No
## 3.5   3rd Male Child       No
Titanic %>% 
  data.frame() ->
flat_titanic ; flat_titanic %>% head()
##   Class    Sex   Age Survived Freq
## 1   1st   Male Child       No    0
## 2   2nd   Male Child       No    0
## 3   3rd   Male Child       No   35
## 4  Crew   Male Child       No    0
## 5   1st Female Child       No    0
## 6   2nd Female Child       No    0

Step 0. Some observations

ggplot2: user needs to describe layout of table

you can make a visual pivot table in ggplot2; analyst job is to describe the form. How will it look

specify 3 things - start with visual layout

  • specify x
  • specify y
  • specify count type geom
tidy_titanic %>% 
  ggplot() + 
  aes(x = Sex, y = Survived) + 
  geom_jitter() + 
  geom_count(color = "blue")


With existing pivot tools, description isn’t so visual

  • specify vars
  • specify aggregation
  • specify visual arrangement (names from?) <- this comes last
tidy_titanic %>% 
  group_by(Sex, Survived) %>% 
  summarize(value = n()) %>% 
  pivot_wider(names_from = Survived)
## # A tibble: 2 × 3
## # Groups:   Sex [2]
##   Sex       No   Yes
##   <fct>  <int> <int>
## 1 Male    1364   367
## 2 Female   126   344

Step 1a. Make Functions to allow description of final table, pivot_count and pivot_calc

x argument is horizontal elements (columns) and y is vertical elements (rows)

pivot_count <- function(data, y, y0, y00, x = NULL, 
                          value = NULL, 
                          fun = sum, 
                          pivot = T #ifelse(is.null(x),F,T)
                          ){
  
  y00 <- enquo(y00)
  y0 <- enquo(y0)
  y <- enquo(y)
  x <- enquo(x)

    data <- data %>% mutate(count = 1)
  
tidy <- data %>% 
  group_by(!!y00, !!y0, !!y, !!x) %>% 
  summarize(value = fun(count)) %>% 
  ungroup()
  
  if(pivot){#or x is null
  tidy %>% 
    pivot_wider(names_from = !!x)
  }else{
  tidy
  }

}
pivot_calc <- function(data, y, y0, y00, x = NULL, 
                          value, 
                          fun = sum, 
                          pivot = T #ifelse(is.null(x),F,T)
                          ){
  
  y00 <- enquo(y00)
  y0 <- enquo(y0)
  y <- enquo(y)
  x <- enquo(x)
  value <- enquo(value)
  
tidy <- data %>% 
  group_by(!!y00, !!y0, !!y, !!x) %>% 
  summarize(value = fun(!!value)) %>% 
  ungroup()
  
  if(pivot){#or x is null
  tidy %>% 
    pivot_wider(names_from = !!x)
  }else{
  tidy
  }

}

Step 1b. Using those functions

tidy_titanic %>% 
   pivot_count(x = Sex)
## # A tibble: 1 × 2
##    Male Female
##   <dbl>  <dbl>
## 1  1731    470
tidy_titanic %>% 
  pivot_count(y = Survived, x = Sex) 
## # A tibble: 2 × 3
##   Survived  Male Female
##   <fct>    <dbl>  <dbl>
## 1 No        1364    126
## 2 Yes        367    344
tidy_titanic %>% 
  pivot_count(y = Survived, x = Sex, y0 = Class)
## # A tibble: 8 × 4
##   Class Survived  Male Female
##   <fct> <fct>    <dbl>  <dbl>
## 1 1st   No         118      4
## 2 1st   Yes         62    141
## 3 2nd   No         154     13
## 4 2nd   Yes         25     93
## 5 3rd   No         422    106
## 6 3rd   Yes         88     90
## 7 Crew  No         670      3
## 8 Crew  Yes        192     20
# for fun organize like it will appear visually in code
tidy_titanic %>% 
  pivot_count(                          x = Sex, 
              y0 = Class, y = Survived          )
## # A tibble: 8 × 4
##   Class Survived  Male Female
##   <fct> <fct>    <dbl>  <dbl>
## 1 1st   No         118      4
## 2 1st   Yes         62    141
## 3 2nd   No         154     13
## 4 2nd   Yes         25     93
## 5 3rd   No         422    106
## 6 3rd   Yes         88     90
## 7 Crew  No         670      3
## 8 Crew  Yes        192     20

After designing your table you might actually want to have the calculation in long form.

tidy_titanic %>% 
  pivot_count(x = Sex, y = Survived, pivot = F)
## # A tibble: 4 × 3
##   Survived Sex    value
##   <fct>    <fct>  <dbl>
## 1 No       Male    1364
## 2 No       Female   126
## 3 Yes      Male     367
## 4 Yes      Female   344

1b. pivot_calc using pivot calc function for non count aggregation

just for fun arrange the code how the table will look

flat_titanic %>%
  pivot_calc(              x = Sex, 
             y = Survived, value = Freq, fun = sum)
## # A tibble: 2 × 3
##   Survived  Male Female
##   <fct>    <dbl>  <dbl>
## 1 No        1364    126
## 2 Yes        367    344

1b style. use another tool to style

goal of functions is not to style - just to make calculation faster by using a visually driven API

tidy_titanic %>% 
  pivot_count(x = Sex, y = Survived, y0 = Class) %>% 
  group_by(Class) %>% 
  gt::gt()
Survived Male Female
1st
No 118 4
Yes 62 141
2nd
No 154 13
Yes 25 93
3rd
No 422 106
Yes 88 90
Crew
No 670 3
Yes 192 20

tidy_titanic %>% 
  pivot_count(x = Sex, y = Survived, y0 = Class, y00 = Age) %>% 
  group_by(Age) %>% 
  gt::gt()
Class Survived Male Female
Child
1st Yes 5 1
2nd Yes 11 13
3rd No 35 17
3rd Yes 13 14
Adult
1st No 118 4
1st Yes 57 140
2nd No 154 13
2nd Yes 14 80
3rd No 387 89
3rd Yes 75 76
Crew No 670 3
Crew Yes 192 20

Back to Step 0, Observations: use existing tools to calculate proportions is many step process

feels like lots of gymnastics… a vis first approach is what we are after

tidy_titanic %>% 
  group_by(Sex, Survived) %>% 
  summarize(value = n()) %>% 
  group_by(Sex) %>% 
  mutate(prop = value/sum(value)) %>% 
  select(-value) %>% 
  pivot_wider(values_from = prop, names_from = Sex)
## # A tibble: 2 × 3
##   Survived  Male Female
##   <fct>    <dbl>  <dbl>
## 1 No       0.788  0.268
## 2 Yes      0.212  0.732

Step 2a. build a function where visual arrangement leads.

two_way_prop <- function(data, y, y0, y00, x = NULL, value = NULL, fun = sum, within, within2,  pivot = !is.null(x), percent = T, round = F){
  
  y00 <- enquo(y00)
  y0 <- enquo(y0)
  x <- enquo(x)
  y <- enquo(y)
  within <- enquo(within)
  within2 <- enquo(within2)

  
  if(is.null(value)){
    data <- data %>% mutate(value = 1)
  }else{
    value <- enquo(value)
  }
  
  data %>% 
  group_by(!!y00, !!y0, !!y, !!x) %>% 
  summarize(value = fun(value)) %>% 
  group_by(!!within, !!within2) %>% 
  mutate(prop = (value/sum(value)*ifelse(percent, 100, 1)) %>% round(1)) %>% 
  select(-value) %>%
  ungroup() ->
tidy
  
  if(pivot){
    
    tidy %>% 
      pivot_wider(values_from = prop, names_from = !!x) 
    
  }else{
    
    tidy
  }
  
}

Step 2b. using the pivot_prop

tidy_titanic %>% 
  two_way_prop(y = Survived, x = Class, within = Class)
## # A tibble: 2 × 5
##   Survived `1st` `2nd` `3rd`  Crew
##   <fct>    <dbl> <dbl> <dbl> <dbl>
## 1 No        37.5  58.6  74.8    76
## 2 Yes       62.5  41.4  25.2    24
tidy_titanic %>% 
  two_way_prop(y0 = Survived, 
               y = Sex, 
               x = Class, 
               within = Survived)
## # A tibble: 4 × 6
##   Survived Sex    `1st` `2nd` `3rd`  Crew
##   <fct>    <fct>  <dbl> <dbl> <dbl> <dbl>
## 1 No       Male     7.9  10.3  28.3  45  
## 2 No       Female   0.3   0.9   7.1   0.2
## 3 Yes      Male     8.7   3.5  12.4  27  
## 4 Yes      Female  19.8  13.1  12.7   2.8
tidy_titanic %>% 
  two_way_prop(y0 = Survived, 
               y = Sex, 
               x = Class, 
               within = Survived,
               within2 = Sex)
## # A tibble: 4 × 6
##   Survived Sex    `1st` `2nd` `3rd`  Crew
##   <fct>    <fct>  <dbl> <dbl> <dbl> <dbl>
## 1 No       Male     8.7  11.3  30.9  49.1
## 2 No       Female   3.2  10.3  84.1   2.4
## 3 Yes      Male    16.9   6.8  24    52.3
## 4 Yes      Female  41    27    26.2   5.8
tidy_titanic %>% 
  two_way_prop(y0 = Survived, 
               y = Sex, 
               x = Class, 
               within = Survived, pivot = F) %>% 
  ggplot() +
  aes(x = Class, y = Sex) +
  facet_grid(rows = vars(Survived)) +
  geom_tile() +
  aes(fill = prop) + 
  geom_text(aes(label = prop %>% round(3)))

tidy_titanic %>% 
  two_way_prop(y0 = Survived, 
               y = Sex, 
               x = Class, 
               within = Survived, 
               within2 = Sex, pivot = F) %>% 
  ggplot() +
  aes(x = Class, y = 1) +
  facet_grid(rows = vars(Survived, Sex)) +
  geom_tile() +
  aes(fill = prop) + 
  geom_text(aes(label = prop %>% round(3)))

Reflections and questions

  1. Does this already exist?
  2. How can API improve? possibly rows = vars(y00, y0, y), cols = vars(x). and within = vars(?, ?) This requires more digging into tidy eval. What about multiple x vars?
  3. How can internals improve? Also tidy eval is old I think. defaults for missing data.
  4. What about summary columns, rows. Column totals, etc. Maybe not very tidy…