
geom_smooth old hat

library(tidyverse, )
mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  geom_point() + 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

a less common move: stat_smooth(geom = “point”, color = “blue”)

Specific predictions (80 evenly spaced between xmin and xmax). A bit of under-the-hood thinking

mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  stat_smooth(geom = "point", color = "blue")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

almost surely new to you (and probably more interesting to stats instructors): predicting at observed values of x

xseq is not advertised, but possibly of interest..

# fit where the support is in the data... 
mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  stat_smooth(geom = "point", 
              color = "blue", 
              xseq = mtcars$wt)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Proposal: Can we make this available in a bit ‘smoother’ way?

Target code

mtcars %>% 
  ggplot() + 
  aes(wt, mpg) + 
  geom_smooth_fit() + 

And also explicitly visualize error (residuals as segments)

last_plot() + 
Details of implementation…

# This function exists to silence an undeclared import warning
gam_method <- function() mgcv::gam
data_frame0 <- function(...) data_frame(..., .name_repair = "minimal")
predictdf <- function(model, xseq, se, level) UseMethod("predictdf")

prediction mechanisms from ggplot2..

compute_group_smooth: verbatim from StatSmooth definition…

compute_group_smooth_error <- function(data, scales, method = NULL, formula = NULL,
                           se = TRUE, n = 80, span = 0.75, fullrange = FALSE,
                           level = 0.95, method.args = list(),
                           na.rm = FALSE, flipped_aes = NA){
compute_group_smooth(data = data, scales = scales, 
                       method = method, formula = formula, 
                       se = FALSE, n= n, span = span, fullrange = fullrange,
                       xseq = data$x, 
                       level = .95, method.args = method.args, 
                       na.rm = na.rm, flipped_aes = flipped_aes) %>% 
    mutate(xend = data$x,
           yend = data$y,
           ymin = y,
           xmin = x,
           ymax = yend,
           xmax = x + (ymax - ymin)

test compute group

mtcars %>%
  rename(x = wt, y = mpg, cat = am) %>%
mtcars %>% 
  rename(x = wt, y = mpg, cat = am) %>% 
  compute_group_smooth_error(method = lm, formula = y ~ x, n = 7)
setup_smooth <- function(data, params) {
    params$flipped_aes <- has_flipped_aes(data, params, ambiguous = TRUE)
    msg <- character()
    if (is.null(params$method) || identical(params$method, "auto")) {
      # Use loess for small datasets, gam with a cubic regression basis for
      # larger. Based on size of the _largest_ group to avoid bad memory
      # behaviour of loess
      max_group <- max(table(interaction(data$group, data$PANEL, drop = TRUE)))

      if (max_group < 1000) {
        params$method <- "loess"
      } else {
        params$method <- "gam"
      msg <- c(msg, paste0("method = '", params$method, "'"))

    if (is.null(params$formula)) {
      if (identical(params$method, "gam")) {
        params$formula <- y ~ s(x, bs = "cs")
      } else {
        params$formula <- y ~ x
      msg <- c(msg, paste0("formula = '", deparse(params$formula), "'"))
    if (identical(params$method, "gam")) {
      params$method <- gam_method()

    if (length(msg) > 0) {
      cli::cli_inform("{.fn geom_smooth} using {msg}")


#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatSmooth <- ggplot2::ggproto("StatSmooth", ggplot2::Stat,
  setup_params = setup_smooth,
  extra_params = c("na.rm", "orientation"),
  compute_group = compute_group_smooth,
  dropped_aes = c("weight"),
  required_aes = c("x", "y")

StatSmooth2 <- StatSmooth

StatSmoothError$compute_group <- compute_group_smooth_error
StatSmoothError <- ggplot2::ggproto("StatSmoothError", ggplot2::Stat,
  setup_params = setup_smooth,
  extra_params = c("na.rm", "orientation"),
  compute_group = compute_group_smooth_error,
  dropped_aes = c("weight"),
  required_aes = c("x", "y")

stat_smooth <- function(mapping = NULL, data = NULL,
                        geom = "smooth", position = "identity",
                        method = NULL,
                        formula = NULL,
                        se = TRUE,
                        n = 80,
                        span = 0.75,
                        fullrange = FALSE,
                        level = 0.95,
                        method.args = list(),
                        na.rm = FALSE,
                        orientation = NA,
                        show.legend = NA,
                        inherit.aes = TRUE) {
    data = data,
    mapping = mapping,
    stat = StatSmooth,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = rlang::list2(
      method = method,
      formula = formula,
      se = se,
      n = n,
      fullrange = fullrange,
      level = level,
      na.rm = na.rm,
      orientation = orientation,
      method.args = method.args,
      span = span,

stat_smooth_obs <- function(mapping = NULL, data = NULL,
                        geom = "smooth", position = "identity",
                        method = NULL,
                        formula = NULL,
                        se = TRUE,
                        n = 80,
                        span = 0.75,
                        fullrange = FALSE,
                        level = 0.95,
                        method.args = list(),
                        na.rm = FALSE,
                        orientation = NA,
                        show.legend = NA,
                        inherit.aes = TRUE) {
    data = data,
    mapping = mapping,
    stat = StatSmoothError,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = rlang::list2(
      method = method,
      formula = formula,
      se = se,
      n = n,
      fullrange = fullrange,
      level = level,
      na.rm = na.rm,
      orientation = orientation,
      method.args = method.args,
      span = span,

Basically working!

mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  geom_point() +
  geom_smooth(alpha = .2, se = FALSE) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

last_plot() +
  stat_smooth_obs(geom = "point", color = "blue") +  # wrap as geom_smooth_fit()
  stat_smooth_obs(geom = "segment") # geom_smooth_error()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

even squared error (best w/ stdz and coord_equal)

stdz <- function(x){
  var_mean <- mean(x) 
  var_sd <- sd(x)

last_plot() + 
  stat_smooth_obs(geom = "rect", alpha = .1)  + # geom_smooth_error_sq() +
  aes(stdz(wt), stdz(mpg)) + 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  geom_point() +
  geom_smooth(alpha = .2, se = FALSE, method = lm) + 
  stat_smooth_obs(geom = "point", color = "blue", method = lm) + # wrap as geom_smooth_fit()
  stat_smooth_obs(geom = "segment", method = lm)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
