class: center, middle, inverse, title-slide # {ggplot2} and extensions geoms ### Gina Reynolds --- ```r library(tidyverse) library(gapminder) ``` --- class: inverse, center, middle name: dd ## Discrete v. Discrete --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r *gapminder ``` ] .right-output-discrete2x_build-auto[ ``` # A tibble: 1,704 x 6 country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 1952 28.8 8425333 779. 2 Afghanistan Asia 1957 30.3 9240934 821. 3 Afghanistan Asia 1962 32.0 10267083 853. 4 Afghanistan Asia 1967 34.0 11537966 836. 5 Afghanistan Asia 1972 36.1 13079460 740. 6 Afghanistan Asia 1977 38.4 14880372 786. 7 Afghanistan Asia 1982 39.9 12881816 978. 8 Afghanistan Asia 1987 40.8 13867957 852. 9 Afghanistan Asia 1992 41.7 16317921 649. 10 Afghanistan Asia 1997 41.8 22227415 635. # … with 1,694 more rows ``` ] --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r gapminder %>% * filter(year == 2002) ``` ] .right-output-discrete2x_build-auto[ ``` # A tibble: 142 x 6 country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 2002 42.1 25268405 727. 2 Albania Europe 2002 75.7 3508512 4604. 3 Algeria Africa 2002 71.0 31287142 5288. 4 Angola Africa 2002 41.0 10866106 2773. 5 Argentina Americas 2002 74.3 38331121 8798. 6 Australia Oceania 2002 80.4 19546792 30688. 7 Austria Europe 2002 79.0 8148312 32418. 8 Bahrain Asia 2002 74.8 656397 23404. 9 Bangladesh Asia 2002 62.0 135656790 1136. 10 Belgium Europe 2002 78.3 10311970 30486. # … with 132 more rows ``` ] --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r gapminder %>% filter(year == 2002) %>% * mutate(seventy_plus = lifeExp >= 75) ``` ] .right-output-discrete2x_build-auto[ ``` # A tibble: 142 x 7 country continent year lifeExp pop gdpPercap seventy_plus <fct> <fct> <int> <dbl> <int> <dbl> <lgl> 1 Afghanistan Asia 2002 42.1 25268405 727. FALSE 2 Albania Europe 2002 75.7 3508512 4604. TRUE 3 Algeria Africa 2002 71.0 31287142 5288. FALSE 4 Angola Africa 2002 41.0 10866106 2773. FALSE 5 Argentina Americas 2002 74.3 38331121 8798. FALSE 6 Australia Oceania 2002 80.4 19546792 30688. TRUE 7 Austria Europe 2002 79.0 8148312 32418. TRUE 8 Bahrain Asia 2002 74.8 656397 23404. FALSE 9 Bangladesh Asia 2002 62.0 135656790 1136. FALSE 10 Belgium Europe 2002 78.3 10311970 30486. TRUE # … with 132 more rows ``` ] --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% * mutate(age_category = * case_when(seventy_plus == T ~ * "Seventy +", * seventy_plus == F ~ * "< Seventy")) ``` ] .right-output-discrete2x_build-auto[ ``` # A tibble: 142 x 8 country continent year lifeExp pop gdpPercap seventy_plus age_category <fct> <fct> <int> <dbl> <int> <dbl> <lgl> <chr> 1 Afghanis… Asia 2002 42.1 2.53e7 727. FALSE < Seventy 2 Albania Europe 2002 75.7 3.51e6 4604. TRUE Seventy + 3 Algeria Africa 2002 71.0 3.13e7 5288. FALSE < Seventy 4 Angola Africa 2002 41.0 1.09e7 2773. FALSE < Seventy 5 Argentina Americas 2002 74.3 3.83e7 8798. FALSE < Seventy 6 Australia Oceania 2002 80.4 1.95e7 30688. TRUE Seventy + 7 Austria Europe 2002 79.0 8.15e6 32418. TRUE Seventy + 8 Bahrain Asia 2002 74.8 6.56e5 23404. FALSE < Seventy 9 Banglade… Asia 2002 62.0 1.36e8 1136. FALSE < Seventy 10 Belgium Europe 2002 78.3 1.03e7 30486. TRUE Seventy + # … with 132 more rows ``` ] --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% * ggplot() ``` ] .right-output-discrete2x_build-auto[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_build_auto_5_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + * aes(x = continent) ``` ] .right-output-discrete2x_build-auto[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_build_auto_6_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + aes(x = continent) + * aes(y = age_category) ``` ] .right-output-discrete2x_build-auto[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_build_auto_7_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x_build-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + aes(x = continent) + aes(y = age_category) + * geom_jitter(col = "blue", alpha = .3, width = .35, height = .35) ``` ] .right-output-discrete2x_build-auto[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_build_auto_8_output-1.png" width="100%" /> ] <style> .left-code-discrete2x_build-auto { color: #777; width: 38%; height: 92%; float: left; font-size: 80% } .right-output-discrete2x_build-auto { width: 60%; float: right; padding-left: 1%; } </style> --- class: split-40 count: false .left-code-discrete2x-rotate[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + aes(x = continent) + aes(y = age_category) + geom_jitter(col = "blue", alpha = .3, width = .35, height = .35) + geom_count(col = "magenta", alpha = .6) + theme_minimal() ``` ] .right-output-discrete2x-rotate[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_rotate_1_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x-rotate[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + aes(x = continent) + aes(y = age_category) + geom_jitter(col = "blue", alpha = .3, width = .35, height = .35) + * geom_text(data = . %>% group_by(age_category, continent) %>% count(), aes(label = n)) + theme_minimal() ``` ] .right-output-discrete2x-rotate[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_rotate_2_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x-rotate[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + aes(x = continent) + aes(y = age_category) + geom_jitter(col = "blue", alpha = .3, width = .35, height = .35) + * geom_label(data = . %>% group_by(age_category, continent) %>% count(), aes(label = n)) + theme_minimal() ``` ] .right-output-discrete2x-rotate[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_rotate_3_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x-rotate[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + aes(x = continent) + aes(y = age_category) + geom_jitter(col = "blue", alpha = .3, width = .35, height = .35) + * geom_point(position = ggforce::position_jitternormal()) + theme_minimal() ``` ] .right-output-discrete2x-rotate[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_rotate_4_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x-rotate[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventy_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventy_plus == T ~ "Seventy +", seventy_plus == F ~ "< Seventy")) %>% ggplot() + aes(x = continent) + aes(y = age_category) + geom_jitter(col = "blue", alpha = .3, width = .35, height = .35) + * geom_point(position = ggforce::position_jitternormal(sd_x = 0.15, sd_y = 0.01)) + theme_minimal() ``` ] .right-output-discrete2x-rotate[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x_rotate_5_output-1.png" width="100%" /> ] <style> .left-code-discrete2x-rotate { color: #777; width: 69%; height: 92%; float: left; font-size: 80% } .right-output-discrete2x-rotate { width: 30%; float: right; padding-left: 1%; } </style> --- class: split-40 count: false .left-code-discrete2x2-rotate[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventyfive_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventyfive_plus == T ~ "75+", seventyfive_plus == F ~ "<75")) %>% ggplot() + aes(x = age_category) + aes(fill = continent) + geom_point(stat = "count") + geom_bar(position = "dodge", alpha = .6) + theme_minimal() ``` ] .right-output-discrete2x2-rotate[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x2_rotate_1_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-discrete2x2-rotate[ ```r gapminder %>% filter(year == 2002) %>% mutate(seventyfive_plus = lifeExp >= 75) %>% mutate(age_category = case_when(seventyfive_plus == T ~ "75+", seventyfive_plus == F ~ "<75")) %>% ggplot() + aes(x = age_category) + aes(fill = continent) + geom_point(stat = "count") + * geom_bar(alpha = .6) + theme_minimal() ``` ] .right-output-discrete2x2-rotate[ <img src="geoms_discrete_discrete_files/figure-html/discrete2x2_rotate_2_output-1.png" width="100%" /> ] <style> .left-code-discrete2x2-rotate { color: #777; width: 38%; height: 92%; float: left; font-size: 80% } .right-output-discrete2x2-rotate { width: 60%; float: right; padding-left: 1%; } </style> --- # Mosaic --- class: split-40 count: false .left-code-mosaic-auto[ ```r *gapminder ``` ] .right-output-mosaic-auto[ ``` # A tibble: 1,704 x 6 country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 1952 28.8 8425333 779. 2 Afghanistan Asia 1957 30.3 9240934 821. 3 Afghanistan Asia 1962 32.0 10267083 853. 4 Afghanistan Asia 1967 34.0 11537966 836. 5 Afghanistan Asia 1972 36.1 13079460 740. 6 Afghanistan Asia 1977 38.4 14880372 786. 7 Afghanistan Asia 1982 39.9 12881816 978. 8 Afghanistan Asia 1987 40.8 13867957 852. 9 Afghanistan Asia 1992 41.7 16317921 649. 10 Afghanistan Asia 1997 41.8 22227415 635. # … with 1,694 more rows ``` ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% * filter(year == 2002) ``` ] .right-output-mosaic-auto[ ``` # A tibble: 142 x 6 country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 2002 42.1 25268405 727. 2 Albania Europe 2002 75.7 3508512 4604. 3 Algeria Africa 2002 71.0 31287142 5288. 4 Angola Africa 2002 41.0 10866106 2773. 5 Argentina Americas 2002 74.3 38331121 8798. 6 Australia Oceania 2002 80.4 19546792 30688. 7 Austria Europe 2002 79.0 8148312 32418. 8 Bahrain Asia 2002 74.8 656397 23404. 9 Bangladesh Asia 2002 62.0 135656790 1136. 10 Belgium Europe 2002 78.3 10311970 30486. # … with 132 more rows ``` ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% * mutate(higher_income = gdpPercap > 20000) ``` ] .right-output-mosaic-auto[ ``` # A tibble: 142 x 7 country continent year lifeExp pop gdpPercap higher_income <fct> <fct> <int> <dbl> <int> <dbl> <lgl> 1 Afghanistan Asia 2002 42.1 25268405 727. FALSE 2 Albania Europe 2002 75.7 3508512 4604. FALSE 3 Algeria Africa 2002 71.0 31287142 5288. FALSE 4 Angola Africa 2002 41.0 10866106 2773. FALSE 5 Argentina Americas 2002 74.3 38331121 8798. FALSE 6 Australia Oceania 2002 80.4 19546792 30688. TRUE 7 Austria Europe 2002 79.0 8148312 32418. TRUE 8 Bahrain Asia 2002 74.8 656397 23404. TRUE 9 Bangladesh Asia 2002 62.0 135656790 1136. FALSE 10 Belgium Europe 2002 78.3 10311970 30486. TRUE # … with 132 more rows ``` ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% * group_by(continent, higher_income) ``` ] .right-output-mosaic-auto[ ``` # A tibble: 142 x 7 # Groups: continent, higher_income [8] country continent year lifeExp pop gdpPercap higher_income <fct> <fct> <int> <dbl> <int> <dbl> <lgl> 1 Afghanistan Asia 2002 42.1 25268405 727. FALSE 2 Albania Europe 2002 75.7 3508512 4604. FALSE 3 Algeria Africa 2002 71.0 31287142 5288. FALSE 4 Angola Africa 2002 41.0 10866106 2773. FALSE 5 Argentina Americas 2002 74.3 38331121 8798. FALSE 6 Australia Oceania 2002 80.4 19546792 30688. TRUE 7 Austria Europe 2002 79.0 8148312 32418. TRUE 8 Bahrain Asia 2002 74.8 656397 23404. TRUE 9 Bangladesh Asia 2002 62.0 135656790 1136. FALSE 10 Belgium Europe 2002 78.3 10311970 30486. TRUE # … with 132 more rows ``` ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% * summarise(count = n()) ``` ] .right-output-mosaic-auto[ ``` # A tibble: 8 x 3 # Groups: continent [5] continent higher_income count <fct> <lgl> <int> 1 Africa FALSE 52 2 Americas FALSE 23 3 Americas TRUE 2 4 Asia FALSE 26 5 Asia TRUE 7 6 Europe FALSE 13 7 Europe TRUE 17 8 Oceania TRUE 2 ``` ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% * mutate(num_in_continents = sum(count), * prop = count/sum(count)) ``` ] .right-output-mosaic-auto[ ``` # A tibble: 8 x 5 # Groups: continent [5] continent higher_income count num_in_continents prop <fct> <lgl> <int> <int> <dbl> 1 Africa FALSE 52 52 1 2 Americas FALSE 23 25 0.92 3 Americas TRUE 2 25 0.08 4 Asia FALSE 26 33 0.788 5 Asia TRUE 7 33 0.212 6 Europe FALSE 13 30 0.433 7 Europe TRUE 17 30 0.567 8 Oceania TRUE 2 2 1 ``` ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% * ungroup() ``` ] .right-output-mosaic-auto[ ``` # A tibble: 8 x 5 continent higher_income count num_in_continents prop <fct> <lgl> <int> <int> <dbl> 1 Africa FALSE 52 52 1 2 Americas FALSE 23 25 0.92 3 Americas TRUE 2 25 0.08 4 Asia FALSE 26 33 0.788 5 Asia TRUE 7 33 0.212 6 Europe FALSE 13 30 0.433 7 Europe TRUE 17 30 0.567 8 Oceania TRUE 2 2 1 ``` ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% * ggplot() ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_8_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% ggplot() + * aes(x = continent, y = prop) ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_9_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% ggplot() + aes(x = continent, y = prop) + * geom_bar(stat = "identity", * position = "fill", * color = "lightgrey") ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_10_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% ggplot() + aes(x = continent, y = prop) + geom_bar(stat = "identity", position = "fill", color = "lightgrey") + * facet_grid(~continent, scales = "free_x", space = "free_x") ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_11_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% ggplot() + aes(x = continent, y = prop) + geom_bar(stat = "identity", position = "fill", color = "lightgrey") + facet_grid(~continent, scales = "free_x", space = "free_x") + * aes(width = num_in_continents) ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_12_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% ggplot() + aes(x = continent, y = prop) + geom_bar(stat = "identity", position = "fill", color = "lightgrey") + facet_grid(~continent, scales = "free_x", space = "free_x") + aes(width = num_in_continents) + * aes(fill = higher_income) ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_13_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% ggplot() + aes(x = continent, y = prop) + geom_bar(stat = "identity", position = "fill", color = "lightgrey") + facet_grid(~continent, scales = "free_x", space = "free_x") + aes(width = num_in_continents) + aes(fill = higher_income) + * theme(panel.spacing.x = unit(0, "npc")) ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_14_output-1.png" width="100%" /> ] --- class: split-40 count: false .left-code-mosaic-auto[ ```r gapminder %>% filter(year == 2002) %>% mutate(higher_income = gdpPercap > 20000) %>% group_by(continent, higher_income) %>% summarise(count = n()) %>% mutate(num_in_continents = sum(count), prop = count/sum(count)) %>% ungroup() %>% ggplot() + aes(x = continent, y = prop) + geom_bar(stat = "identity", position = "fill", color = "lightgrey") + facet_grid(~continent, scales = "free_x", space = "free_x") + aes(width = num_in_continents) + aes(fill = higher_income) + theme(panel.spacing.x = unit(0, "npc")) + * theme(strip.text = element_blank()) ``` ] .right-output-mosaic-auto[ <img src="geoms_discrete_discrete_files/figure-html/mosaic_auto_15_output-1.png" width="100%" /> ] <style> .left-code-mosaic-auto { color: #777; width: 38%; height: 92%; float: left; font-size: 80% } .right-output-mosaic-auto { width: 60%; float: right; padding-left: 1%; } </style> <style type="text/css"> .remark-code{line-height: 1.5; font-size: 55%} </style>