class: center, middle, inverse, title-slide # mini wrangle ## made with flipbookr and xaringan ### Gina Reynolds, January 2020 --- ```r library(gapminder) library(tidyverse) ``` ``` ## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ── ``` ``` ## ✓ ggplot2 3.3.0.9000 ✓ purrr 0.3.3 ## ✓ tibble 2.1.3 ✓ dplyr 0.8.4 ## ✓ tidyr 1.0.2 ✓ stringr 1.4.0 ## ✓ readr 1.3.1 ✓ forcats 0.4.0 ``` ``` ## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ── ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag() ``` ```r knitr::opts_chunk$set(cache = F, comment = "") ``` --- class: split-40 count: false .column[.content[ ```r *gapminder ``` ]] .column[.content[ ``` # A tibble: 1,704 x 6 country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 1952 28.8 8425333 779. 2 Afghanistan Asia 1957 30.3 9240934 821. 3 Afghanistan Asia 1962 32.0 10267083 853. 4 Afghanistan Asia 1967 34.0 11537966 836. 5 Afghanistan Asia 1972 36.1 13079460 740. 6 Afghanistan Asia 1977 38.4 14880372 786. 7 Afghanistan Asia 1982 39.9 12881816 978. 8 Afghanistan Asia 1987 40.8 13867957 852. 9 Afghanistan Asia 1992 41.7 16317921 649. 10 Afghanistan Asia 1997 41.8 22227415 635. # … with 1,694 more rows ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% * distinct(country, continent) ``` ]] .column[.content[ ``` # A tibble: 142 x 2 country continent <fct> <fct> 1 Afghanistan Asia 2 Albania Europe 3 Algeria Africa 4 Angola Africa 5 Argentina Americas 6 Australia Oceania 7 Austria Europe 8 Bahrain Asia 9 Bangladesh Asia 10 Belgium Europe # … with 132 more rows ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% * # tally and count are the same * count(continent) ``` ]] .column[.content[ ``` # A tibble: 5 x 2 continent n <fct> <int> 1 Africa 52 2 Americas 25 3 Asia 33 4 Europe 30 5 Oceania 2 ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% * rename(count = n) ``` ]] .column[.content[ ``` # A tibble: 5 x 2 continent count <fct> <int> 1 Africa 52 2 Americas 25 3 Asia 33 4 Europe 30 5 Oceania 2 ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> *count_in_continents ``` ]] .column[.content[ ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> count_in_continents * # another pipeline *gapminder ``` ]] .column[.content[ ``` # A tibble: 1,704 x 6 country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 1952 28.8 8425333 779. 2 Afghanistan Asia 1957 30.3 9240934 821. 3 Afghanistan Asia 1962 32.0 10267083 853. 4 Afghanistan Asia 1967 34.0 11537966 836. 5 Afghanistan Asia 1972 36.1 13079460 740. 6 Afghanistan Asia 1977 38.4 14880372 786. 7 Afghanistan Asia 1982 39.9 12881816 978. 8 Afghanistan Asia 1987 40.8 13867957 852. 9 Afghanistan Asia 1992 41.7 16317921 649. 10 Afghanistan Asia 1997 41.8 22227415 635. # … with 1,694 more rows ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> count_in_continents # another pipeline gapminder %>% * summarize(mean_life_exp = mean(lifeExp), * median_life_exp = median(lifeExp)) ``` ]] .column[.content[ ``` # A tibble: 1 x 2 mean_life_exp median_life_exp <dbl> <dbl> 1 59.5 60.7 ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> count_in_continents # another pipeline gapminder %>% summarize(mean_life_exp = mean(lifeExp), median_life_exp = median(lifeExp)) -> *overall_summaries ``` ]] .column[.content[ ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> count_in_continents # another pipeline gapminder %>% summarize(mean_life_exp = mean(lifeExp), median_life_exp = median(lifeExp)) -> overall_summaries * # another *gapminder ``` ]] .column[.content[ ``` # A tibble: 1,704 x 6 country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 1952 28.8 8425333 779. 2 Afghanistan Asia 1957 30.3 9240934 821. 3 Afghanistan Asia 1962 32.0 10267083 853. 4 Afghanistan Asia 1967 34.0 11537966 836. 5 Afghanistan Asia 1972 36.1 13079460 740. 6 Afghanistan Asia 1977 38.4 14880372 786. 7 Afghanistan Asia 1982 39.9 12881816 978. 8 Afghanistan Asia 1987 40.8 13867957 852. 9 Afghanistan Asia 1992 41.7 16317921 649. 10 Afghanistan Asia 1997 41.8 22227415 635. # … with 1,694 more rows ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> count_in_continents # another pipeline gapminder %>% summarize(mean_life_exp = mean(lifeExp), median_life_exp = median(lifeExp)) -> overall_summaries # another gapminder %>% * group_by(continent) ``` ]] .column[.content[ ``` # A tibble: 1,704 x 6 # Groups: continent [5] country continent year lifeExp pop gdpPercap <fct> <fct> <int> <dbl> <int> <dbl> 1 Afghanistan Asia 1952 28.8 8425333 779. 2 Afghanistan Asia 1957 30.3 9240934 821. 3 Afghanistan Asia 1962 32.0 10267083 853. 4 Afghanistan Asia 1967 34.0 11537966 836. 5 Afghanistan Asia 1972 36.1 13079460 740. 6 Afghanistan Asia 1977 38.4 14880372 786. 7 Afghanistan Asia 1982 39.9 12881816 978. 8 Afghanistan Asia 1987 40.8 13867957 852. 9 Afghanistan Asia 1992 41.7 16317921 649. 10 Afghanistan Asia 1997 41.8 22227415 635. # … with 1,694 more rows ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> count_in_continents # another pipeline gapminder %>% summarize(mean_life_exp = mean(lifeExp), median_life_exp = median(lifeExp)) -> overall_summaries # another gapminder %>% group_by(continent) %>% * summarize(mean_life_exp = mean(lifeExp), * median_life_exp = median(lifeExp)) ``` ]] .column[.content[ ``` # A tibble: 5 x 3 continent mean_life_exp median_life_exp <fct> <dbl> <dbl> 1 Africa 48.9 47.8 2 Americas 64.7 67.0 3 Asia 60.1 61.8 4 Europe 71.9 72.2 5 Oceania 74.3 73.7 ``` ]] --- class: split-40 count: false .column[.content[ ```r gapminder %>% distinct(country, continent) %>% # tally and count are the same count(continent) %>% rename(count = n) -> count_in_continents # another pipeline gapminder %>% summarize(mean_life_exp = mean(lifeExp), median_life_exp = median(lifeExp)) -> overall_summaries # another gapminder %>% group_by(continent) %>% summarize(mean_life_exp = mean(lifeExp), median_life_exp = median(lifeExp)) -> *summaries_by_continent ``` ]] .column[.content[ ]] <style type="text/css"> .remark-code{line-height: 1.5; font-size: 70%} </style>