library(tidyverse)
compute_panel_lm <- function(data, scales, formula = y ~ . ){
# data prep
data <- data %>% remove_missing()
lmdata <- data %>% select(-PANEL, -group)
# modeling
lm <- lm(data = lmdata, formula = formula)
data$model <- NA
data$model[1] <- list(lm)
# save attributes
data$yend = data$y
data$y = lm$fitted.values
data$xend = data$x
data$residuals <- lm$residuals
data
}
compute_panel_lm_glance <- function(data, scales, formula = y ~ . ){
# data prep
data <- data %>% remove_missing()
lmdata <- data %>% select(-PANEL, -group)
# modeling
lm <- lm(data = lmdata, formula = formula) %>% summary()
data.frame(label = paste0("R-squared: \n", round(lm$r.squared, 3)),
x = I(.15),
y = I(.95))
}
StatLmGlance <- ggproto("StatLmGlance", Stat,
compute_panel = compute_panel_lm_glance)
compute_panel_lm_tidy <- function(data, scales, formula = y ~ . ){
# data prep
data <- data %>% remove_missing()
lmdata <- data %>% select(-PANEL, -group)
# modeling
model <- lm(data = lmdata, formula = formula) %>% summary()
model$coefficients %>%
data.frame() %>%
rownames_to_column(var = "variable") %>%
mutate(y = as.numeric(as.factor(variable)),
x = Estimate) %>%
slice(-1)
}
StatLmTidy <- ggproto("StatLmTidy", Stat,
compute_panel = compute_panel_lm_tidy,
default_aes = aes(label = after_stat(variable),
xend = after_stat(Estimate + Std..Error*1.96)))
StatLm <- ggproto("StatLm", Stat,
compute_panel = compute_panel_lm)
stat_lm <- function (mapping = NULL, data = NULL, geom = "point", position = "identity",
..., show.legend = NA, inherit.aes = TRUE)
{
layer(data = data, mapping = mapping, stat = StatLm,
geom = geom, position = position, show.legend = show.legend,
inherit.aes = inherit.aes, params = rlang::list2(na.rm = FALSE,
...))
}
geom_lm <- stat_lm
return_model <- function(){layer_data(i = length(last_plot()$layers))$model[[1]] %>% summary}
stat_lm_empty <- function(...){stat_lm(formula = y ~ 1, ...)}
geom_lm_empty <- stat_lm_empty
# scatterplot
p0 <- palmerpenguins::penguins %>%
ggplot() +
aes(y = flipper_length_mm, x = bill_depth_mm) +
geom_point() ; p0
# adding bill_depth_mm
p0 +
aes(bill_depth_mm = bill_depth_mm) +
geom_lm(alpha = .2) +
geom_text(stat = StatLmGlance, size = 4) +
stat_lm(geom = 'segment', alpha = .2)
return_model()
##
## Call:
## lm(formula = formula, data = lmdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.2853 -7.7712 -0.6224 7.1894 28.4563
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 272.2190 5.4126 50.29 <2e-16 ***
## bill_depth_mm -4.1574 0.3135 -13.26 <2e-16 ***
## x NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.43 on 340 degrees of freedom
## Multiple R-squared: 0.3409, Adjusted R-squared: 0.3389
## F-statistic: 175.8 on 1 and 340 DF, p-value: < 2.2e-16
# adding species
last_plot() +
aes(color = species)
return_model()
##
## Call:
## lm(formula = formula, data = lmdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.6864 -4.0225 -0.0292 3.7581 21.0470
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 136.9488 5.1843 26.416 < 2e-16 ***
## colourChinstrap 5.6554 0.8484 6.666 1.07e-10 ***
## colourGentoo 36.9531 1.1806 31.301 < 2e-16 ***
## bill_depth_mm 2.8891 0.2814 10.267 < 2e-16 ***
## x NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.807 on 338 degrees of freedom
## Multiple R-squared: 0.8309, Adjusted R-squared: 0.8294
## F-statistic: 553.8 on 3 and 338 DF, p-value: < 2.2e-16
# adding sex
last_plot() +
aes(shape = sex)
# adding island
last_plot() +
aes(island = island)
# adding wt
last_plot() +
aes(body_mass_g = body_mass_g)
return_model()
##
## Call:
## lm(formula = formula, data = lmdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.023 -3.380 -0.151 3.054 16.413
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.488e+02 6.396e+00 23.272 < 2e-16 ***
## body_mass_g 5.867e-03 9.516e-04 6.166 2.08e-09 ***
## islandDream 1.291e+00 1.066e+00 1.211 0.22687
## islandTorgersen 2.683e+00 1.105e+00 2.428 0.01571 *
## shapemale 1.513e+00 9.216e-01 1.642 0.10152
## colourChinstrap 5.552e+00 9.572e-01 5.801 1.57e-08 ***
## colourGentoo 2.351e+01 2.225e+00 10.565 < 2e-16 ***
## bill_depth_mm 9.492e-01 3.629e-01 2.616 0.00932 **
## x NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.264 on 325 degrees of freedom
## Multiple R-squared: 0.8619, Adjusted R-squared: 0.8589
## F-statistic: 289.8 on 7 and 325 DF, p-value: < 2.2e-16
# empty model
p0 +
geom_lm_empty(alpha = .2) +
stat_lm_empty(geom = 'segment', alpha = .2)
return_model()
##
## Call:
## lm(formula = formula, data = lmdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.915 -10.915 -3.915 12.085 30.085
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 200.9152 0.7604 264.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.06 on 341 degrees of freedom
palmerpenguins::penguins %>%
head(2)
## # A tibble: 2 × 8
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## <fct> <fct> <dbl> <dbl> <int> <int>
## 1 Adelie Torgersen 39.1 18.7 181 3750
## 2 Adelie Torgersen 39.5 17.4 186 3800
## # ℹ 2 more variables: sex <fct>, year <int>
ggplot(palmerpenguins::penguins) +
aes(y = flipper_length_mm, x = bill_depth_mm) +
geom_point() +
aes(bill_depth_mm = bill_depth_mm) +
geom_lm(alpha = .2) +
geom_text(stat = StatLmGlance, size = 5) +
stat_lm(geom = 'segment', alpha = .2) +
aes(color = species) +
aes(shape = sex) +
aes(island = island) +
aes(body_mass_g = body_mass_g)
## Warning: Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggwipe::last_plot_wipe() +
geom_point(stat = StatLmTidy) +
geom_text(stat = StatLmTidy, vjust = 1) +
geom_segment(stat = StatLmTidy) +
geom_segment(stat = StatLmTidy,
aes(x = after_stat(Estimate - Std..Error*1.96))) +
labs(x = "Estimate") +
labs(y = "Variable")
## Warning: Removed 11 rows containing missing values or values outside the scale
## range.
## Warning: Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
layer_data()
## Warning: Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## label xend PANEL x y variable
## 1 body_mass_g 0.007732147 1 0.005867073 3 body_mass_g
## 2 islandDream 3.379744972 1 1.290556876 6 islandDream
## 3 islandTorgersen 4.848164813 1 2.682784849 7 islandTorgersen
## 4 shapemale 3.319878460 1 1.513479103 8 shapemale
## 5 colourChinstrap 7.428611386 1 5.552470550 4 colourChinstrap
## 6 colourGentoo 27.871750655 1 23.510331866 5 colourGentoo
## 7 bill_depth_mm 1.660482326 1 0.949200783 2 bill_depth_mm
## Estimate Std..Error t.value Pr...t.. shape colour size fill alpha
## 1 0.005867073 0.0009515682 6.165689 2.080233e-09 19 black 1.5 NA NA
## 2 1.290556876 1.0659122938 1.210753 2.268697e-01 19 black 1.5 NA NA
## 3 2.682784849 1.1047856957 2.428331 1.571075e-02 19 black 1.5 NA NA
## 4 1.513479103 0.9216323248 1.642172 1.015215e-01 19 black 1.5 NA NA
## 5 5.552470550 0.9572147121 5.800653 1.566870e-08 19 black 1.5 NA NA
## 6 23.510331866 2.2252136676 10.565427 1.261605e-22 19 black 1.5 NA NA
## 7 0.949200783 0.3628987468 2.615608 9.322368e-03 19 black 1.5 NA NA
## stroke
## 1 0.5
## 2 0.5
## 3 0.5
## 4 0.5
## 5 0.5
## 6 0.5
## 7 0.5
https://github.com/tidyverse/datascience-box/blob/main/course-materials/hw-instructions/hw-07/hw-07-bike-rentals-dc.Rmd
library(dsbox)
dcbikeshare %>%
mutate(atemp_raw = atemp * 50) %>%
ggplot(mapping = aes(x = dteday, y = cnt)) +
geom_point() +
labs(
title = "Bike rentals in DC, 2011 and 2012",
subtitle = "Warmer temperatures associated with more bike rentals",
x = "Date",
y = "Bike renrals",
color = "Temperature (C)"
) +
geom_lm(alpha = .2) +
geom_text(stat = StatLmGlance, size = 5) +
stat_lm(geom = 'segment', alpha = .2) +
aes(color = atemp_raw) +
theme_minimal() +
aes(shape = as.factor(season)) +
aes(season = as.factor(season)) +
aes(holiday = holiday) +
aes(workingday = workingday) +
aes(dteday = dteday) +
aes(atemp_raw = atemp_raw)
# ggwipe::last_plot_wipe() +
# geom_point(stat = StatLmTidy) +
# geom_text(stat = StatLmTidy, vjust = 1) +
# geom_segment(stat = StatLmTidy) +
# geom_segment(stat = StatLmTidy,
# aes(x = after_stat(Estimate - Std..Error*1.96))) +
# labs(x = "Estimate") +
# labs(y = "Variable")
return_model()
##
## Call:
## lm(formula = formula, data = lmdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5795.5 -472.0 171.1 632.6 3132.5
##
## Coefficients: (5 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -77626.519 3056.175 -25.400 < 2e-16 ***
## x 5.150 0.201 25.621 < 2e-16 ***
## atemp_raw 116.598 8.196 14.226 < 2e-16 ***
## dteday NA NA NA NA
## workingday 90.943 85.538 1.063 0.2880
## holiday -591.033 237.928 -2.484 0.0132 *
## season2 836.933 141.936 5.897 5.7e-09 ***
## season3 219.136 182.799 1.199 0.2310
## season4 241.978 127.043 1.905 0.0572 .
## shape2 NA NA NA NA
## shape3 NA NA NA NA
## shape4 NA NA NA NA
## colour NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1038 on 723 degrees of freedom
## Multiple R-squared: 0.7154, Adjusted R-squared: 0.7126
## F-statistic: 259.6 on 7 and 723 DF, p-value: < 2.2e-16
knitr::knit_exit()