class: center, middle, inverse, title-slide # Ex. 1.2: Data Exploration ## Using summarize() --- count: false .panel1-the_chunk-auto[ ```r *wage_data ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 180,084 x 10 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Some Col… F 53: Never… 51 0 Married White 5 3 Less tha… F 39: Retai… 20 8000 Never Married White 5 4 Less tha… M 8: Comput… 16 4000 Never Married White 5 5 Less tha… F 53: Never… 80 0 Widowed White 5 6 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 7 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 8 Bachelors M 31: Anima… 62 25480 Never Married White 1 9 Less tha… F 53: Never… 70 0 Widowed White 1 10 Bachelors F 41: Farmi… 53 6000 Married White 6 # … with 180,074 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% * filter(Occupation != "53: Never Worked" & * Earnings > 0) ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 84,631 x 10 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Less tha… F 39: Retai… 20 8000 Never Married White 5 3 Less tha… M 8: Comput… 16 4000 Never Married White 5 4 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 5 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 6 Bachelors M 31: Anima… 62 25480 Never Married White 1 7 Bachelors F 41: Farmi… 53 6000 Married White 6 8 Bachelors M 8: Comput… 52 70200 Married Asian 6 9 Less tha… F 41: Farmi… 16 10520 Never Married Asian 6 10 Some Col… F 30: Publi… 31 46000 Married White 4 # … with 84,621 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% * summarise(mean(Earnings)) ``` ] .panel2-the_chunk-auto[ ``` # A tibble: 1 x 1 `mean(Earnings)` <dbl> 1 52670. ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% * as.numeric() ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> *mean_earnings ``` ] .panel2-the_chunk-auto[ ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> mean_earnings #printing saved object *mean_earnings ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> mean_earnings #printing saved object mean_earnings *wage_data ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ``` # A tibble: 180,084 x 10 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Some Col… F 53: Never… 51 0 Married White 5 3 Less tha… F 39: Retai… 20 8000 Never Married White 5 4 Less tha… M 8: Comput… 16 4000 Never Married White 5 5 Less tha… F 53: Never… 80 0 Widowed White 5 6 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 7 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 8 Bachelors M 31: Anima… 62 25480 Never Married White 1 9 Less tha… F 53: Never… 70 0 Widowed White 1 10 Bachelors F 41: Farmi… 53 6000 Married White 6 # … with 180,074 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> mean_earnings #printing saved object mean_earnings wage_data %>% * filter(Occupation != "53: Never Worked" & * Earnings > 0) ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ``` # A tibble: 84,631 x 10 Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Less tha… F 39: Retai… 20 8000 Never Married White 5 3 Less tha… M 8: Comput… 16 4000 Never Married White 5 4 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 5 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 6 Bachelors M 31: Anima… 62 25480 Never Married White 1 7 Bachelors F 41: Farmi… 53 6000 Married White 6 8 Bachelors M 8: Comput… 52 70200 Married Asian 6 9 Less tha… F 41: Farmi… 16 10520 Never Married Asian 6 10 Some Col… F 30: Publi… 31 46000 Married White 4 # … with 84,621 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> mean_earnings #printing saved object mean_earnings wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% * group_by(Sex) ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ``` # A tibble: 84,631 x 10 # Groups: Sex [2] Education Sex Occupation Age Earnings MaritalStatus Race FamilySize <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> 1 Bachelors M 40: Offic… 49 220000 Married White 5 2 Less tha… F 39: Retai… 20 8000 Never Married White 5 3 Less tha… M 8: Comput… 16 4000 Never Married White 5 4 Less tha… M 32: Chefs… 27 17350 Never Married Black 2 5 Less tha… M 33: Food … 24 12000 Never Married Hisp… 2 6 Bachelors M 31: Anima… 62 25480 Never Married White 1 7 Bachelors F 41: Farmi… 53 6000 Married White 6 8 Bachelors M 8: Comput… 52 70200 Married Asian 6 9 Less tha… F 41: Farmi… 16 10520 Never Married Asian 6 10 Some Col… F 30: Publi… 31 46000 Married White 4 # … with 84,621 more rows, and 2 more variables: FamilyMakeup <chr>, # Age_squared <dbl> ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> mean_earnings #printing saved object mean_earnings wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% group_by(Sex) %>% * summarise(mean = mean(Earnings), * count = n()) ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ``` # A tibble: 2 x 3 Sex mean count <chr> <dbl> <int> 1 F 42501. 41023 2 M 62235. 43608 ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> mean_earnings #printing saved object mean_earnings wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% group_by(Sex) %>% summarise(mean = mean(Earnings), count = n()) -> *mean_earnings_by_sex ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ] --- count: false .panel1-the_chunk-auto[ ```r wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% summarise(mean(Earnings)) %>% as.numeric() -> mean_earnings #printing saved object mean_earnings wage_data %>% filter(Occupation != "53: Never Worked" & Earnings > 0) %>% group_by(Sex) %>% summarise(mean = mean(Earnings), count = n()) -> mean_earnings_by_sex *mean_earnings_by_sex ``` ] .panel2-the_chunk-auto[ ``` [1] 52669.56 ``` ``` # A tibble: 2 x 3 Sex mean count <chr> <dbl> <int> 1 F 42501. 41023 2 M 62235. 43608 ``` ] <style> .panel1-the_chunk-auto { color: black; width: 49%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-the_chunk-auto { color: black; width: 49%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-the_chunk-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> <style type="text/css"> .remark-code{line-height: 1.5; font-size: 90%} @media print { .has-continuation { display: block; } } code.r.hljs.remark-code{ position: relative; overflow-x: hidden; } code.r.hljs.remark-code:hover{ overflow-x:visible; width: 500px; border-style: solid; } </style>