knitr::opts_chunk$set(message = F, warning = F)
library(tidyverse)
library(magrittr)
Titanic %>%
data.frame() %>%
uncount(weights = Freq) ->
tidy_titanic ; tidy_titanic %>% head()
## Class Sex Age Survived
## 3 3rd Male Child No
## 3.1 3rd Male Child No
## 3.2 3rd Male Child No
## 3.3 3rd Male Child No
## 3.4 3rd Male Child No
## 3.5 3rd Male Child No
Titanic %>%
data.frame() ->
flat_titanic ; flat_titanic %>% head()
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
you can make a visual pivot table in ggplot2; analyst job is to describe the form. How will it look
specify 3 things - start with visual layout
tidy_titanic %>%
ggplot() +
aes(x = Sex, y = Survived) +
geom_jitter() +
geom_count(color = "blue")
tidy_titanic %>%
group_by(Sex, Survived) %>%
summarize(value = n()) %>%
pivot_wider(names_from = Survived)
## # A tibble: 2 × 3
## # Groups: Sex [2]
## Sex No Yes
## <fct> <int> <int>
## 1 Male 1364 367
## 2 Female 126 344
pivot_count <- function(data, y, y0, y00, x = NULL,
value = NULL,
fun = sum,
pivot = T #ifelse(is.null(x),F,T)
){
y00 <- enquo(y00)
y0 <- enquo(y0)
y <- enquo(y)
x <- enquo(x)
data <- data %>% mutate(count = 1)
tidy <- data %>%
group_by(!!y00, !!y0, !!y, !!x) %>%
summarize(value = fun(count)) %>%
ungroup()
if(pivot){#or x is null
tidy %>%
pivot_wider(names_from = !!x)
}else{
tidy
}
}
pivot_calc <- function(data, y, y0, y00, x = NULL,
value,
fun = sum,
pivot = T #ifelse(is.null(x),F,T)
){
y00 <- enquo(y00)
y0 <- enquo(y0)
y <- enquo(y)
x <- enquo(x)
value <- enquo(value)
tidy <- data %>%
group_by(!!y00, !!y0, !!y, !!x) %>%
summarize(value = fun(!!value)) %>%
ungroup()
if(pivot){#or x is null
tidy %>%
pivot_wider(names_from = !!x)
}else{
tidy
}
}
tidy_titanic %>%
pivot_count(x = Sex)
## # A tibble: 1 × 2
## Male Female
## <dbl> <dbl>
## 1 1731 470
tidy_titanic %>%
pivot_count(y = Survived, x = Sex)
## # A tibble: 2 × 3
## Survived Male Female
## <fct> <dbl> <dbl>
## 1 No 1364 126
## 2 Yes 367 344
tidy_titanic %>%
pivot_count(y = Survived, x = Sex, y0 = Class)
## # A tibble: 8 × 4
## Class Survived Male Female
## <fct> <fct> <dbl> <dbl>
## 1 1st No 118 4
## 2 1st Yes 62 141
## 3 2nd No 154 13
## 4 2nd Yes 25 93
## 5 3rd No 422 106
## 6 3rd Yes 88 90
## 7 Crew No 670 3
## 8 Crew Yes 192 20
# for fun organize like it will appear visually in code
tidy_titanic %>%
pivot_count( x = Sex,
y0 = Class, y = Survived )
## # A tibble: 8 × 4
## Class Survived Male Female
## <fct> <fct> <dbl> <dbl>
## 1 1st No 118 4
## 2 1st Yes 62 141
## 3 2nd No 154 13
## 4 2nd Yes 25 93
## 5 3rd No 422 106
## 6 3rd Yes 88 90
## 7 Crew No 670 3
## 8 Crew Yes 192 20
tidy_titanic %>%
pivot_count(x = Sex, y = Survived, pivot = F)
## # A tibble: 4 × 3
## Survived Sex value
## <fct> <fct> <dbl>
## 1 No Male 1364
## 2 No Female 126
## 3 Yes Male 367
## 4 Yes Female 344
flat_titanic %>%
pivot_calc( x = Sex,
y = Survived, value = Freq, fun = sum)
## # A tibble: 2 × 3
## Survived Male Female
## <fct> <dbl> <dbl>
## 1 No 1364 126
## 2 Yes 367 344
tidy_titanic %>%
pivot_count(x = Sex, y = Survived, y0 = Class) %>%
group_by(Class) %>%
gt::gt()
Survived | Male | Female |
---|---|---|
1st | ||
No | 118 | 4 |
Yes | 62 | 141 |
2nd | ||
No | 154 | 13 |
Yes | 25 | 93 |
3rd | ||
No | 422 | 106 |
Yes | 88 | 90 |
Crew | ||
No | 670 | 3 |
Yes | 192 | 20 |
tidy_titanic %>%
pivot_count(x = Sex, y = Survived, y0 = Class, y00 = Age) %>%
group_by(Age) %>%
gt::gt()
Class | Survived | Male | Female |
---|---|---|---|
Child | |||
1st | Yes | 5 | 1 |
2nd | Yes | 11 | 13 |
3rd | No | 35 | 17 |
3rd | Yes | 13 | 14 |
Adult | |||
1st | No | 118 | 4 |
1st | Yes | 57 | 140 |
2nd | No | 154 | 13 |
2nd | Yes | 14 | 80 |
3rd | No | 387 | 89 |
3rd | Yes | 75 | 76 |
Crew | No | 670 | 3 |
Crew | Yes | 192 | 20 |
tidy_titanic %>%
group_by(Sex, Survived) %>%
summarize(value = n()) %>%
group_by(Sex) %>%
mutate(prop = value/sum(value)) %>%
select(-value) %>%
pivot_wider(values_from = prop, names_from = Sex)
## # A tibble: 2 × 3
## Survived Male Female
## <fct> <dbl> <dbl>
## 1 No 0.788 0.268
## 2 Yes 0.212 0.732
two_way_prop <- function(data, y, y0, y00, x = NULL, value = NULL, fun = sum, within, within2, pivot = !is.null(x), percent = T, round = F){
y00 <- enquo(y00)
y0 <- enquo(y0)
x <- enquo(x)
y <- enquo(y)
within <- enquo(within)
within2 <- enquo(within2)
if(is.null(value)){
data <- data %>% mutate(value = 1)
}else{
value <- enquo(value)
}
data %>%
group_by(!!y00, !!y0, !!y, !!x) %>%
summarize(value = fun(value)) %>%
group_by(!!within, !!within2) %>%
mutate(prop = (value/sum(value)*ifelse(percent, 100, 1)) %>% round(1)) %>%
select(-value) %>%
ungroup() ->
tidy
if(pivot){
tidy %>%
pivot_wider(values_from = prop, names_from = !!x)
}else{
tidy
}
}
tidy_titanic %>%
two_way_prop(y = Survived, x = Class, within = Class)
## # A tibble: 2 × 5
## Survived `1st` `2nd` `3rd` Crew
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 No 37.5 58.6 74.8 76
## 2 Yes 62.5 41.4 25.2 24
tidy_titanic %>%
two_way_prop(y0 = Survived,
y = Sex,
x = Class,
within = Survived)
## # A tibble: 4 × 6
## Survived Sex `1st` `2nd` `3rd` Crew
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 No Male 7.9 10.3 28.3 45
## 2 No Female 0.3 0.9 7.1 0.2
## 3 Yes Male 8.7 3.5 12.4 27
## 4 Yes Female 19.8 13.1 12.7 2.8
tidy_titanic %>%
two_way_prop(y0 = Survived,
y = Sex,
x = Class,
within = Survived,
within2 = Sex)
## # A tibble: 4 × 6
## Survived Sex `1st` `2nd` `3rd` Crew
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 No Male 8.7 11.3 30.9 49.1
## 2 No Female 3.2 10.3 84.1 2.4
## 3 Yes Male 16.9 6.8 24 52.3
## 4 Yes Female 41 27 26.2 5.8
tidy_titanic %>%
two_way_prop(y0 = Survived,
y = Sex,
x = Class,
within = Survived, pivot = F) %>%
ggplot() +
aes(x = Class, y = Sex) +
facet_grid(rows = vars(Survived)) +
geom_tile() +
aes(fill = prop) +
geom_text(aes(label = prop %>% round(3)))
tidy_titanic %>%
two_way_prop(y0 = Survived,
y = Sex,
x = Class,
within = Survived,
within2 = Sex, pivot = F) %>%
ggplot() +
aes(x = Class, y = 1) +
facet_grid(rows = vars(Survived, Sex)) +
geom_tile() +
aes(fill = prop) +
geom_text(aes(label = prop %>% round(3)))