library(tidyverse)

compute_panel_lm <- function(data, scales, formula = y ~ . ){
  
  # data prep
  data <- data %>% remove_missing()
  lmdata <- data %>% select(-PANEL, -group)
  
  # modeling
  lm <- lm(data = lmdata, formula = formula)
  data$model <- NA
  data$model[1] <- list(lm)
  
  # save attributes
  data$yend = data$y
  data$y = lm$fitted.values
  data$xend = data$x
  data$residuals <- lm$residuals
  
  data
  
}

compute_panel_lm_glance <- function(data, scales, formula = y ~ . ){
  
  # data prep
  data <- data %>% remove_missing()
  lmdata <- data %>% select(-PANEL, -group)
  
  # modeling
  lm <- lm(data = lmdata, formula = formula) %>% summary()

  data.frame(label = paste0("R-squared: \n", round(lm$r.squared, 3)),
             x = I(.15),
             y = I(.95))
  
  }

StatLmGlance <- ggproto("StatLmGlance", Stat, 
                  compute_panel = compute_panel_lm_glance)


compute_panel_lm_tidy <- function(data, scales, formula = y ~ . ){
  
  # data prep
  data <- data %>% remove_missing()
  lmdata <- data %>% select(-PANEL, -group)
  
  # modeling
  model <- lm(data = lmdata, formula = formula) %>% summary()

  model$coefficients %>% 
    data.frame() %>% 
    rownames_to_column(var = "variable") %>% 
    mutate(y = as.numeric(as.factor(variable)),
           x = Estimate) %>% 
    slice(-1)
  
}

StatLmTidy <- ggproto("StatLmTidy", Stat,
                      compute_panel = compute_panel_lm_tidy,
                      default_aes = aes(label = after_stat(variable),
                                        xend = after_stat(Estimate + Std..Error*1.96)))


StatLm <- ggproto("StatLm", Stat, 
                  compute_panel = compute_panel_lm)


stat_lm <- function (mapping = NULL, data = NULL, geom = "point", position = "identity", 
    ..., show.legend = NA, inherit.aes = TRUE) 
{
    layer(data = data, mapping = mapping, stat = StatLm, 
        geom = geom, position = position, show.legend = show.legend, 
        inherit.aes = inherit.aes, params = rlang::list2(na.rm = FALSE, 
            ...))
}

geom_lm <- stat_lm

return_model <- function(){layer_data(i = length(last_plot()$layers))$model[[1]] %>% summary}

stat_lm_empty <- function(...){stat_lm(formula = y ~ 1, ...)}

geom_lm_empty <- stat_lm_empty
# scatterplot
p0 <- palmerpenguins::penguins %>% 
  ggplot() + 
  aes(y = flipper_length_mm, x = bill_depth_mm) + 
  geom_point() ; p0

# adding bill_depth_mm
p0 + 
  aes(bill_depth_mm = bill_depth_mm) +
  geom_lm(alpha = .2) +
  geom_text(stat = StatLmGlance, size = 4) +
  stat_lm(geom = 'segment', alpha = .2)

return_model()
## 
## Call:
## lm(formula = formula, data = lmdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.2853  -7.7712  -0.6224   7.1894  28.4563 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   272.2190     5.4126   50.29   <2e-16 ***
## bill_depth_mm  -4.1574     0.3135  -13.26   <2e-16 ***
## x                   NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.43 on 340 degrees of freedom
## Multiple R-squared:  0.3409, Adjusted R-squared:  0.3389 
## F-statistic: 175.8 on 1 and 340 DF,  p-value: < 2.2e-16
# adding species
last_plot() + 
  aes(color = species) 

return_model()
## 
## Call:
## lm(formula = formula, data = lmdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.6864  -4.0225  -0.0292   3.7581  21.0470 
## 
## Coefficients: (1 not defined because of singularities)
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     136.9488     5.1843  26.416  < 2e-16 ***
## colourChinstrap   5.6554     0.8484   6.666 1.07e-10 ***
## colourGentoo     36.9531     1.1806  31.301  < 2e-16 ***
## bill_depth_mm     2.8891     0.2814  10.267  < 2e-16 ***
## x                     NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.807 on 338 degrees of freedom
## Multiple R-squared:  0.8309, Adjusted R-squared:  0.8294 
## F-statistic: 553.8 on 3 and 338 DF,  p-value: < 2.2e-16
# adding sex
last_plot() + 
  aes(shape = sex)

# adding island
last_plot() + 
  aes(island = island)

# adding wt
last_plot() + 
  aes(body_mass_g = body_mass_g)

return_model()
## 
## Call:
## lm(formula = formula, data = lmdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.023  -3.380  -0.151   3.054  16.413 
## 
## Coefficients: (1 not defined because of singularities)
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.488e+02  6.396e+00  23.272  < 2e-16 ***
## body_mass_g     5.867e-03  9.516e-04   6.166 2.08e-09 ***
## islandDream     1.291e+00  1.066e+00   1.211  0.22687    
## islandTorgersen 2.683e+00  1.105e+00   2.428  0.01571 *  
## shapemale       1.513e+00  9.216e-01   1.642  0.10152    
## colourChinstrap 5.552e+00  9.572e-01   5.801 1.57e-08 ***
## colourGentoo    2.351e+01  2.225e+00  10.565  < 2e-16 ***
## bill_depth_mm   9.492e-01  3.629e-01   2.616  0.00932 ** 
## x                      NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.264 on 325 degrees of freedom
## Multiple R-squared:  0.8619, Adjusted R-squared:  0.8589 
## F-statistic: 289.8 on 7 and 325 DF,  p-value: < 2.2e-16
# empty model
p0 +
  geom_lm_empty(alpha = .2) +
  stat_lm_empty(geom = 'segment', alpha = .2)  

return_model()
## 
## Call:
## lm(formula = formula, data = lmdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.915 -10.915  -3.915  12.085  30.085 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 200.9152     0.7604   264.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.06 on 341 degrees of freedom
palmerpenguins::penguins %>% 
  head(2)
## # A tibble: 2 × 8
##   species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##   <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
## 1 Adelie  Torgersen           39.1          18.7               181        3750
## 2 Adelie  Torgersen           39.5          17.4               186        3800
## # ℹ 2 more variables: sex <fct>, year <int>
ggplot(palmerpenguins::penguins) + 
  aes(y = flipper_length_mm, x = bill_depth_mm) + 
  geom_point() +
  aes(bill_depth_mm = bill_depth_mm) +
  geom_lm(alpha = .2) +
  geom_text(stat = StatLmGlance, size = 5) +
  stat_lm(geom = 'segment', alpha = .2) +
  aes(color = species) + 
  aes(shape = sex) + 
  aes(island = island) + 
  aes(body_mass_g = body_mass_g)
## Warning: Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggwipe::last_plot_wipe() +
  geom_point(stat = StatLmTidy) + 
  geom_text(stat = StatLmTidy, vjust = 1) + 
  geom_segment(stat = StatLmTidy) + 
  geom_segment(stat = StatLmTidy, 
               aes(x = after_stat(Estimate - Std..Error*1.96))) + 
  labs(x = "Estimate") + 
  labs(y = "Variable")
## Warning: Removed 11 rows containing missing values or values outside the scale
## range.
## Warning: Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.

layer_data()
## Warning: Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
## Removed 11 rows containing missing values or values outside the scale range.
##             label         xend PANEL            x y        variable
## 1     body_mass_g  0.007732147     1  0.005867073 3     body_mass_g
## 2     islandDream  3.379744972     1  1.290556876 6     islandDream
## 3 islandTorgersen  4.848164813     1  2.682784849 7 islandTorgersen
## 4       shapemale  3.319878460     1  1.513479103 8       shapemale
## 5 colourChinstrap  7.428611386     1  5.552470550 4 colourChinstrap
## 6    colourGentoo 27.871750655     1 23.510331866 5    colourGentoo
## 7   bill_depth_mm  1.660482326     1  0.949200783 2   bill_depth_mm
##       Estimate   Std..Error   t.value     Pr...t.. shape colour size fill alpha
## 1  0.005867073 0.0009515682  6.165689 2.080233e-09    19  black  1.5   NA    NA
## 2  1.290556876 1.0659122938  1.210753 2.268697e-01    19  black  1.5   NA    NA
## 3  2.682784849 1.1047856957  2.428331 1.571075e-02    19  black  1.5   NA    NA
## 4  1.513479103 0.9216323248  1.642172 1.015215e-01    19  black  1.5   NA    NA
## 5  5.552470550 0.9572147121  5.800653 1.566870e-08    19  black  1.5   NA    NA
## 6 23.510331866 2.2252136676 10.565427 1.261605e-22    19  black  1.5   NA    NA
## 7  0.949200783 0.3628987468  2.615608 9.322368e-03    19  black  1.5   NA    NA
##   stroke
## 1    0.5
## 2    0.5
## 3    0.5
## 4    0.5
## 5    0.5
## 6    0.5
## 7    0.5

https://github.com/tidyverse/datascience-box/blob/main/course-materials/hw-instructions/hw-07/hw-07-bike-rentals-dc.Rmd

library(dsbox)
dcbikeshare %>%
  mutate(atemp_raw = atemp * 50) %>%
  ggplot(mapping = aes(x = dteday, y = cnt)) +
    geom_point() +
    labs(
      title = "Bike rentals in DC, 2011 and 2012",
      subtitle = "Warmer temperatures associated with more bike rentals",
      x = "Date",
      y = "Bike renrals",
      color = "Temperature (C)"
    ) +
  geom_lm(alpha = .2) +
  geom_text(stat = StatLmGlance, size = 5) +
  stat_lm(geom = 'segment', alpha = .2) + 
  aes(color = atemp_raw) + 
  theme_minimal() + 
  aes(shape = as.factor(season)) + 
  aes(season = as.factor(season)) +
  aes(holiday = holiday) +
  aes(workingday = workingday) +
  aes(dteday = dteday) +
  aes(atemp_raw = atemp_raw)

# ggwipe::last_plot_wipe() +
#   geom_point(stat = StatLmTidy) + 
#   geom_text(stat = StatLmTidy, vjust = 1) + 
#   geom_segment(stat = StatLmTidy) + 
#   geom_segment(stat = StatLmTidy, 
#                aes(x = after_stat(Estimate - Std..Error*1.96))) + 
#   labs(x = "Estimate") + 
#   labs(y = "Variable")

return_model()
## 
## Call:
## lm(formula = formula, data = lmdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5795.5  -472.0   171.1   632.6  3132.5 
## 
## Coefficients: (5 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -77626.519   3056.175 -25.400  < 2e-16 ***
## x                5.150      0.201  25.621  < 2e-16 ***
## atemp_raw      116.598      8.196  14.226  < 2e-16 ***
## dteday              NA         NA      NA       NA    
## workingday      90.943     85.538   1.063   0.2880    
## holiday       -591.033    237.928  -2.484   0.0132 *  
## season2        836.933    141.936   5.897  5.7e-09 ***
## season3        219.136    182.799   1.199   0.2310    
## season4        241.978    127.043   1.905   0.0572 .  
## shape2              NA         NA      NA       NA    
## shape3              NA         NA      NA       NA    
## shape4              NA         NA      NA       NA    
## colour              NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1038 on 723 degrees of freedom
## Multiple R-squared:  0.7154, Adjusted R-squared:  0.7126 
## F-statistic: 259.6 on 7 and 723 DF,  p-value: < 2.2e-16
knitr::knit_exit()