class: center, middle, inverse, title-slide # Ex. 1.1: Data Exploration ## Tailoring Your Data with tidyverse --- count: false .panel1-the_chunk-auto[ ```r *read_csv("wage_data.csv") ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 180,084 x 10 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Some Col… F 53: Never… 51 0 Married White 5 3 Less tha… F 39: Retai… 20 8000 Never Married White 5 4 Less tha… M 8: Comput… 16 4000 Never Married White 5 5 Less tha… F 53: Never… 80 0 Widowed White 5 6 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 7 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 8 Bachelors M 31: Anima… 62 25480 Never Married White 1 9 Less tha… F 53: Never… 70 0 Widowed White 1 10 Bachelors F 41: Farmi… 53 6000 Married White 6 # … with 180,074 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r read_csv("wage_data.csv") -> * wage_data ``` ] .panel2-the_chunk-auto[ ] --- count: false .panel1-the_chunk-auto[ ```r read_csv("wage_data.csv") -> wage_data *wage_data ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 180,084 x 10 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Some Col… F 53: Never… 51 0 Married White 5 3 Less tha… F 39: Retai… 20 8000 Never Married White 5 4 Less tha… M 8: Comput… 16 4000 Never Married White 5 5 Less tha… F 53: Never… 80 0 Widowed White 5 6 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 7 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 8 Bachelors M 31: Anima… 62 25480 Never Married White 1 9 Less tha… F 53: Never… 70 0 Widowed White 1 10 Bachelors F 41: Farmi… 53 6000 Married White 6 # … with 180,074 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r read_csv("wage_data.csv") -> wage_data wage_data %>% ## filter out anyone who has never ## worked and anyone who reported no earnings * filter(Occupation != "53: Never Worked" & * Earnings > 0) ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 84,631 x 10 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Less tha… F 39: Retai… 20 8000 Never Married White 5 3 Less tha… M 8: Comput… 16 4000 Never Married White 5 4 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 5 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 6 Bachelors M 31: Anima… 62 25480 Never Married White 1 7 Bachelors F 41: Farmi… 53 6000 Married White 6 8 Bachelors M 8: Comput… 52 70200 Married Asian 6 9 Less tha… F 41: Farmi… 16 10520 Never Married Asian 6 10 Some Col… F 30: Publi… 31 46000 Married White 4 # … with 84,621 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r read_csv("wage_data.csv") -> wage_data wage_data %>% ## filter out anyone who has never ## worked and anyone who reported no earnings filter(Occupation != "53: Never Worked" & Earnings > 0) %>% ## create a new variable showing earnings ## per family member * mutate(Earnings_per_member = Earnings / FamilySize) ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 84,631 x 11 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Less tha… F 39: Retai… 20 8000 Never Married White 5 3 Less tha… M 8: Comput… 16 4000 Never Married White 5 4 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 5 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 6 Bachelors M 31: Anima… 62 25480 Never Married White 1 7 Bachelors F 41: Farmi… 53 6000 Married White 6 8 Bachelors M 8: Comput… 52 70200 Married Asian 6 9 Less tha… F 41: Farmi… 16 10520 Never Married Asian 6 10 Some Col… F 30: Publi… 31 46000 Married White 4 # … with 84,621 more rows, and 3 more variables: FamilyMakeup <chr>, # Age_squared <dbl>, Earnings_per_member <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r read_csv("wage_data.csv") -> wage_data wage_data %>% ## filter out anyone who has never ## worked and anyone who reported no earnings filter(Occupation != "53: Never Worked" & Earnings > 0) %>% ## create a new variable showing earnings ## per family member mutate(Earnings_per_member = Earnings / FamilySize) %>% ## keep specific columns * select(Earnings, Earnings_per_member, Sex, Age, Education) ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 84,631 x 5 Earnings Earnings_per_member Sex Age Education <dbl> <dbl> <chr> <dbl> <chr> 1 220000 44000 M 49 Bachelors 2 8000 1600 F 20 Less than HS 3 4000 800 M 16 Less than HS 4 17350 8675 M 27 Less than HS 5 12000 6000 M 24 Less than HS 6 25480 25480 M 62 Bachelors 7 6000 1000 F 53 Bachelors 8 70200 11700 M 52 Bachelors 9 10520 1753. F 16 Less than HS 10 46000 11500 F 31 Some College/Associates # … with 84,621 more rows ``` ] --- count: false .panel1-the_chunk-auto[ ```r read_csv("wage_data.csv") -> wage_data wage_data %>% ## filter out anyone who has never ## worked and anyone who reported no earnings filter(Occupation != "53: Never Worked" & Earnings > 0) %>% ## create a new variable showing earnings ## per family member mutate(Earnings_per_member = Earnings / FamilySize) %>% ## keep specific columns select(Earnings, Earnings_per_member, Sex, Age, Education) %>% * filter(Earnings< 150000) ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 80,635 x 5 Earnings Earnings_per_member Sex Age Education <dbl> <dbl> <chr> <dbl> <chr> 1 8000 1600 F 20 Less than HS 2 4000 800 M 16 Less than HS 3 17350 8675 M 27 Less than HS 4 12000 6000 M 24 Less than HS 5 25480 25480 M 62 Bachelors 6 6000 1000 F 53 Bachelors 7 70200 11700 M 52 Bachelors 8 10520 1753. F 16 Less than HS 9 46000 11500 F 31 Some College/Associates 10 40000 40000 M 37 Less than HS # … with 80,625 more rows ``` ] --- count: false .panel1-the_chunk-auto[ ```r read_csv("wage_data.csv") -> wage_data wage_data %>% ## filter out anyone who has never ## worked and anyone who reported no earnings filter(Occupation != "53: Never Worked" & Earnings > 0) %>% ## create a new variable showing earnings ## per family member mutate(Earnings_per_member = Earnings / FamilySize) %>% ## keep specific columns select(Earnings, Earnings_per_member, Sex, Age, Education) %>% filter(Earnings< 150000) -> *employed_under_150K ``` ] .panel2-the_chunk-auto[ ] <style> .panel1-the_chunk-auto { color: black; width: 49%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-the_chunk-auto { color: black; width: 49%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-the_chunk-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> <style type="text/css"> .remark-code{line-height: 1.5; font-size: 90%} @media print { .has-continuation { display: block; } } code.r.hljs.remark-code{ position: relative; overflow-x: hidden; } code.r.hljs.remark-code:hover{ overflow-x:visible; width: 500px; border-style: solid; } </style>