plotting observations and linear model is fairly easy with existing ggplot2 code.
library(tidyverse)
anscombe %>%
ggplot() +
aes(x = x3, y = y3) +
geom_point() +
geom_smooth(method = lm,
se = F)
plotting the predicted values of the observations is a bit harder, but we can do it with pre calculation.
# calc fitted and add points at fitted
lm(formula = anscombe$y3 ~ anscombe$x3) ->
my_model
data.frame(x3 = anscombe$x3,
predicted_y3 = my_model$fitted.values) ->
fitted_data
anscombe %>%
ggplot() +
aes(x = x3, y = y3) +
geom_point() +
geom_smooth(method = lm,
se = F) +
geom_point(data = fitted_data,
aes(x = x3,
y = predicted_y3),
color = "blue") +
geom_text(data = fitted_data,
aes(x = x3,
y = predicted_y3,
label = round(predicted_y3,
digits = 1)
),
color = "midnightblue")
## `geom_smooth()` using formula 'y ~ x'
StatOlsfittedpoint <- ggplot2::ggproto(`_class` = "StatOlsfittedpoint",
`_inherit` = ggplot2::Stat,
required_aes = c("x", "y"),
compute_group = function(data, scales) {
model <- lm(formula = data$y ~ data$x)
data.frame(x = data$x,
y = model$fitted.values)
}
)
geom_point_lm_fitted <- function(mapping = NULL, data = NULL,
position = "identity", na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatOlsfittedpoint,
geom = ggplot2::GeomPoint,
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
library(ggplot2)
ggplot(cars) +
aes(x = speed, y = dist) +
geom_point() +
geom_smooth(method = lm) +
geom_point_lm_fitted(color = "plum4")
## `geom_smooth()` using formula 'y ~ x'
StatOlsfitted <- ggplot2::ggproto(`_class` = "StatOlsfitted",
`_inherit` = ggplot2::Stat,
required_aes = c("x", "y"),
compute_group = function(data, scales) {
model <- lm(formula = data$y ~ data$x)
data.frame(x = data$x,
y = model$fitted.values,
label =
round(x = model$fitted.values,
digits = 2))
}
)
geom_text_lm_fitted <- function(mapping = NULL, data = NULL,
position = "identity", na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatOlsfitted,
geom = ggplot2::GeomText,
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
library(ggplot2)
ggplot(cars) +
aes(x = speed, y = dist) +
geom_point() +
geom_smooth(method = lm) +
geom_text_lm_fitted(color = "plum4")
## `geom_smooth()` using formula 'y ~ x'
In the video, Thomas Lin Pederson advises pulling out the data transformation function.
# this function takes in a dataframe with columns named x and y
# and returns a data frame with columns named x y and label
# for the output, the x column is the same as the input
# the y column is computed based on a fit of the input x and y
# and the label is also computed
my_compute_group <- function(data, scales) {
model <- lm(formula = data$y ~ data$x)
data.frame(x = data$x,
y = model$fitted.values,
label =
round(x = model$fitted.values,
digits = 2))
}
# lets see how this function works...
cars %>%
transmute(x = speed,
y = dist) %>%
my_compute_group(data = .)
## x y label
## 1 4 -1.849460 -1.85
## 2 4 -1.849460 -1.85
## 3 7 9.947766 9.95
## 4 7 9.947766 9.95
## 5 8 13.880175 13.88
## 6 9 17.812584 17.81
## 7 10 21.744993 21.74
## 8 10 21.744993 21.74
## 9 10 21.744993 21.74
## 10 11 25.677401 25.68
## 11 11 25.677401 25.68
## 12 12 29.609810 29.61
## 13 12 29.609810 29.61
## 14 12 29.609810 29.61
## 15 12 29.609810 29.61
## 16 13 33.542219 33.54
## 17 13 33.542219 33.54
## 18 13 33.542219 33.54
## 19 13 33.542219 33.54
## 20 14 37.474628 37.47
## 21 14 37.474628 37.47
## 22 14 37.474628 37.47
## 23 14 37.474628 37.47
## 24 15 41.407036 41.41
## 25 15 41.407036 41.41
## 26 15 41.407036 41.41
## 27 16 45.339445 45.34
## 28 16 45.339445 45.34
## 29 17 49.271854 49.27
## 30 17 49.271854 49.27
## 31 17 49.271854 49.27
## 32 18 53.204263 53.20
## 33 18 53.204263 53.20
## 34 18 53.204263 53.20
## 35 18 53.204263 53.20
## 36 19 57.136672 57.14
## 37 19 57.136672 57.14
## 38 19 57.136672 57.14
## 39 20 61.069080 61.07
## 40 20 61.069080 61.07
## 41 20 61.069080 61.07
## 42 20 61.069080 61.07
## 43 20 61.069080 61.07
## 44 22 68.933898 68.93
## 45 23 72.866307 72.87
## 46 24 76.798715 76.80
## 47 24 76.798715 76.80
## 48 24 76.798715 76.80
## 49 24 76.798715 76.80
## 50 25 80.731124 80.73
StatOlsfitted <- ggplot2::ggproto(`_class` = "StatOlsfitted",
`_inherit` = ggplot2::Stat,
required_aes = c("x", "y"),
compute_group = my_compute_group
)
geom_text_lm_fitted <- function(mapping = NULL, data = NULL,
position = "identity", na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatOlsfitted,
geom = ggplot2::GeomText,
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
library(ggplot2)
ggplot(cars) +
aes(x = speed, y = dist) +
geom_point() +
geom_smooth(method = lm) +
geom_text_lm_fitted(color = "plum4")
## `geom_smooth()` using formula 'y ~ x'