existing ggplot2

plotting observations and linear model is fairly easy with existing ggplot2 code.

library(tidyverse)
anscombe %>% 
  ggplot() + 
  aes(x = x3, y = y3) + 
  geom_point() +
  geom_smooth(method = lm,
              se = F)

plotting predictions

plotting the predicted values of the observations is a bit harder, but we can do it with pre calculation.

# calc fitted and add points at fitted
lm(formula = anscombe$y3 ~ anscombe$x3) ->
  my_model

data.frame(x3 = anscombe$x3, 
           predicted_y3 = my_model$fitted.values) ->
  fitted_data

anscombe %>% 
  ggplot() + 
  aes(x = x3, y = y3) + 
  geom_point() + 
  geom_smooth(method = lm, 
              se = F) + 
  geom_point(data = fitted_data,
             aes(x = x3, 
                 y = predicted_y3),
             color = "blue") +
  geom_text(data = fitted_data,
             aes(x = x3, 
                 y = predicted_y3,
                 label = round(predicted_y3, 
                               digits = 1)
                 ),
             color = "midnightblue") 
## `geom_smooth()` using formula 'y ~ x'

Or extend ggplot2

StatOlsfittedpoint <- ggplot2::ggproto(`_class` = "StatOlsfittedpoint",
                                  `_inherit` = ggplot2::Stat,
                                  required_aes = c("x", "y"),
                                  compute_group = function(data, scales) {
                                    
                                    model <- lm(formula = data$y ~ data$x)
                                  
                                    data.frame(x = data$x,
                                               y = model$fitted.values)
                                  }

)


geom_point_lm_fitted <- function(mapping = NULL, data = NULL,
                           position = "identity", na.rm = FALSE,
                           show.legend = NA,
                           inherit.aes = TRUE, ...) {
  ggplot2::layer(
    stat = StatOlsfittedpoint, 
    geom = ggplot2::GeomPoint, 
    data = data, 
    mapping = mapping,
    position = position, 
    show.legend = show.legend, 
    inherit.aes = inherit.aes,
    params = list(na.rm = na.rm, ...)
  )
}
library(ggplot2)
ggplot(cars) + 
  aes(x = speed, y = dist) +
  geom_point() + 
  geom_smooth(method = lm) + 
  geom_point_lm_fitted(color = "plum4")
## `geom_smooth()` using formula 'y ~ x'

StatOlsfitted <- ggplot2::ggproto(`_class` = "StatOlsfitted",
                                  `_inherit` = ggplot2::Stat,
                                  required_aes = c("x", "y"),
                                  compute_group = function(data, scales) {
                                    
                                    model <- lm(formula = data$y ~ data$x)
                                  
                                    data.frame(x = data$x,
                                               y = model$fitted.values,
                                               label = 
                                                 round(x = model$fitted.values,
                                                       digits = 2))
                                  }

)


geom_text_lm_fitted <- function(mapping = NULL, data = NULL,
                           position = "identity", na.rm = FALSE,
                           show.legend = NA,
                           inherit.aes = TRUE, ...) {
  ggplot2::layer(
    stat = StatOlsfitted, 
    geom = ggplot2::GeomText, 
    data = data, 
    mapping = mapping,
    position = position, 
    show.legend = show.legend, 
    inherit.aes = inherit.aes,
    params = list(na.rm = na.rm, ...)
  )
}
library(ggplot2)
ggplot(cars) + 
  aes(x = speed, y = dist) +
  geom_point() + 
  geom_smooth(method = lm) + 
  geom_text_lm_fitted(color = "plum4")
## `geom_smooth()` using formula 'y ~ x'

Finally extension following TLP’s advise - write out the data transformation function separately

In the video, Thomas Lin Pederson advises pulling out the data transformation function.

# this function takes in a dataframe with columns named x and y
# and returns a data frame with columns named x y and label
# for the output, the x column is the same as the input 
# the y column is computed based on a fit of the input x and y
# and the label is also computed 
my_compute_group <- function(data, scales) {
                                    
                                    model <- lm(formula = data$y ~ data$x)
                                  
                                    data.frame(x = data$x,
                                               y = model$fitted.values,
                                               label = 
                                                 round(x = model$fitted.values,
                                                       digits = 2))
}

# lets see how this function works...
cars %>% 
  transmute(x = speed,
            y = dist) %>% 
my_compute_group(data = .)
##     x         y label
## 1   4 -1.849460 -1.85
## 2   4 -1.849460 -1.85
## 3   7  9.947766  9.95
## 4   7  9.947766  9.95
## 5   8 13.880175 13.88
## 6   9 17.812584 17.81
## 7  10 21.744993 21.74
## 8  10 21.744993 21.74
## 9  10 21.744993 21.74
## 10 11 25.677401 25.68
## 11 11 25.677401 25.68
## 12 12 29.609810 29.61
## 13 12 29.609810 29.61
## 14 12 29.609810 29.61
## 15 12 29.609810 29.61
## 16 13 33.542219 33.54
## 17 13 33.542219 33.54
## 18 13 33.542219 33.54
## 19 13 33.542219 33.54
## 20 14 37.474628 37.47
## 21 14 37.474628 37.47
## 22 14 37.474628 37.47
## 23 14 37.474628 37.47
## 24 15 41.407036 41.41
## 25 15 41.407036 41.41
## 26 15 41.407036 41.41
## 27 16 45.339445 45.34
## 28 16 45.339445 45.34
## 29 17 49.271854 49.27
## 30 17 49.271854 49.27
## 31 17 49.271854 49.27
## 32 18 53.204263 53.20
## 33 18 53.204263 53.20
## 34 18 53.204263 53.20
## 35 18 53.204263 53.20
## 36 19 57.136672 57.14
## 37 19 57.136672 57.14
## 38 19 57.136672 57.14
## 39 20 61.069080 61.07
## 40 20 61.069080 61.07
## 41 20 61.069080 61.07
## 42 20 61.069080 61.07
## 43 20 61.069080 61.07
## 44 22 68.933898 68.93
## 45 23 72.866307 72.87
## 46 24 76.798715 76.80
## 47 24 76.798715 76.80
## 48 24 76.798715 76.80
## 49 24 76.798715 76.80
## 50 25 80.731124 80.73
StatOlsfitted <- ggplot2::ggproto(`_class` = "StatOlsfitted",
                                  `_inherit` = ggplot2::Stat,
                                  required_aes = c("x", "y"),
                                  compute_group = my_compute_group
)


geom_text_lm_fitted <- function(mapping = NULL, data = NULL,
                           position = "identity", na.rm = FALSE,
                           show.legend = NA,
                           inherit.aes = TRUE, ...) {
  ggplot2::layer(
    stat = StatOlsfitted, 
    geom = ggplot2::GeomText, 
    data = data, 
    mapping = mapping,
    position = position, 
    show.legend = show.legend, 
    inherit.aes = inherit.aes,
    params = list(na.rm = na.rm, ...)
  )
}
library(ggplot2)
ggplot(cars) + 
  aes(x = speed, y = dist) +
  geom_point() + 
  geom_smooth(method = lm) + 
  geom_text_lm_fitted(color = "plum4")
## `geom_smooth()` using formula 'y ~ x'