Background

geom_smooth old hat

library(tidyverse, )
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.0
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  geom_point() + 
  stat_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

a less common move: stat_smooth(geom = “point”, color = “blue”)

Specific predictions (80 evenly spaced between xmin and xmax). A bit of under-the-hood thinking

mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  stat_smooth(geom = "point", color = "blue")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

almost surely new to you (and probably more interesting to stats instructors): predicting at observed values of x

xseq is not advertised, but possibly of interest.. https://ggplot2.tidyverse.org/reference/geom_smooth.html

# fit where the support is in the data... 
mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  stat_smooth(geom = "point", 
              color = "blue", 
              xseq = mtcars$wt)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Proposal: Can we make this available in a bit ‘smoother’ way?

Target code

mtcars %>% 
  ggplot() + 
  aes(wt, mpg) + 
  geom_smooth_fit() + 
  geom_smooth_error() 

And also explicitly visualize error (residuals as segments)

last_plot() + 
  geom_smooth_error() 
## Error in geom_smooth_error(): could not find function "geom_smooth_error"

Details of implementation…

# This function exists to silence an undeclared import warning
gam_method <- function() mgcv::gam
data_frame0 <- function(...) data_frame(..., .name_repair = "minimal")
predictdf <- function(model, xseq, se, level) UseMethod("predictdf")

prediction mechanisms from ggplot2..

compute_group_smooth: verbatim from StatSmooth definition…

compute_group_smooth_error <- function(data, scales, method = NULL, formula = NULL,
                           se = TRUE, n = 80, span = 0.75, fullrange = FALSE,
                           level = 0.95, method.args = list(),
                           na.rm = FALSE, flipped_aes = NA){
  
  
compute_group_smooth(data = data, scales = scales, 
                       method = method, formula = formula, 
                       se = FALSE, n= n, span = span, fullrange = fullrange,
                       xseq = data$x, 
                       level = .95, method.args = method.args, 
                       na.rm = na.rm, flipped_aes = flipped_aes) %>% 
    mutate(xend = data$x,
           yend = data$y,
           ymin = y,
           xmin = x,
           ymax = yend,
           xmax = x + (ymax - ymin)
           )
  
}

test compute group

library(dplyr)
mtcars %>%
  rename(x = wt, y = mpg, cat = am) %>%
  compute_group_smooth(method = lm, formula = y ~ x)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##           x         y      ymin     ymax        se flipped_aes
## 1  1.513000 29.198941 26.963760 31.43412 1.0944578          NA
## 2  1.562506 28.934356 26.748212 31.12050 1.0704467          NA
## 3  1.612013 28.669770 26.532294 30.80725 1.0466168          NA
## 4  1.661519 28.405185 26.315980 30.49439 1.0229807          NA
## 5  1.711025 28.140600 26.099242 30.18196 0.9995522          NA
## 6  1.760532 27.876015 25.882050 29.86998 0.9763462          NA
## 7  1.810038 27.611430 25.664370 29.55849 0.9533790          NA
## 8  1.859544 27.346844 25.446166 29.24752 0.9306683          NA
## 9  1.909051 27.082259 25.227400 28.93712 0.9082332          NA
## 10 1.958557 26.817674 25.008027 28.62732 0.8860947          NA
## 11 2.008063 26.553089 24.788003 28.31818 0.8642757          NA
## 12 2.057570 26.288504 24.567275 28.00973 0.8428009          NA
## 13 2.107076 26.023919 24.345789 27.70205 0.8216973          NA
## 14 2.156582 25.759333 24.123485 27.39518 0.8009943          NA
## 15 2.206089 25.494748 23.900298 27.08920 0.7807237          NA
## 16 2.255595 25.230163 23.676157 26.78417 0.7609202          NA
## 17 2.305101 24.965578 23.450986 26.48017 0.7416210          NA
## 18 2.354608 24.700993 23.224702 26.17728 0.7228666          NA
## 19 2.404114 24.436408 22.997217 25.87560 0.7047005          NA
## 20 2.453620 24.171822 22.768435 25.57521 0.6871694          NA
## 21 2.503127 23.907237 22.538255 25.27622 0.6703230          NA
## 22 2.552633 23.642652 22.306568 24.97874 0.6542143          NA
## 23 2.602139 23.378067 22.073261 24.68287 0.6388992          NA
## 24 2.651646 23.113482 21.838214 24.38875 0.6244358          NA
## 25 2.701152 22.848897 21.601303 24.09649 0.6108849          NA
## 26 2.750658 22.584311 21.362403 23.80622 0.5983083          NA
## 27 2.800165 22.319726 21.121385 23.51807 0.5867688          NA
## 28 2.849671 22.055141 20.878121 23.23216 0.5763286          NA
## 29 2.899177 21.790556 20.632489 22.94862 0.5670484          NA
## 30 2.948684 21.525971 20.384369 22.66757 0.5589861          NA
## 31 2.998190 21.261386 20.133653 22.38912 0.5521949          NA
## 32 3.047696 20.996800 19.880245 22.11336 0.5467223          NA
## 33 3.097203 20.732215 19.624062 21.84037 0.5426081          NA
## 34 3.146709 20.467630 19.365041 21.57022 0.5398835          NA
## 35 3.196215 20.203045 19.103139 21.30295 0.5385694          NA
## 36 3.245722 19.938460 18.838336 21.03858 0.5386762          NA
## 37 3.295228 19.673875 18.570633 20.77712 0.5402031          NA
## 38 3.344734 19.409289 18.300053 20.51853 0.5431381          NA
## 39 3.394241 19.144704 18.026645 20.26276 0.5474586          NA
## 40 3.443747 18.880119 17.750473 20.00977 0.5531320          NA
## 41 3.493253 18.615534 17.471622 19.75945 0.5601173          NA
## 42 3.542759 18.350949 17.190190 19.51171 0.5683661          NA
## 43 3.592266 18.086364 16.906289 19.26644 0.5778243          NA
## 44 3.641772 17.821778 16.620037 19.02352 0.5884336          NA
## 45 3.691278 17.557193 16.331558 18.78283 0.6001329          NA
## 46 3.740785 17.292608 16.040981 18.54423 0.6128598          NA
## 47 3.790291 17.028023 15.748434 18.30761 0.6265517          NA
## 48 3.839797 16.763438 15.454041 18.07283 0.6411468          NA
## 49 3.889304 16.498853 15.157927 17.83978 0.6565849          NA
## 50 3.938810 16.234267 14.860210 17.60832 0.6728079          NA
## 51 3.988316 15.969682 14.561004 17.37836 0.6897604          NA
## 52 4.037823 15.705097 14.260414 17.14978 0.7073900          NA
## 53 4.087329 15.440512 13.958542 16.92248 0.7256474          NA
## 54 4.136835 15.175927 13.655483 16.69637 0.7444863          NA
## 55 4.186342 14.911342 13.351324 16.47136 0.7638638          NA
## 56 4.235848 14.646756 13.046146 16.24737 0.7837398          NA
## 57 4.285354 14.382171 12.740026 16.02432 0.8040775          NA
## 58 4.334861 14.117586 12.433033 15.80214 0.8248426          NA
## 59 4.384367 13.853001 12.125231 15.58077 0.8460038          NA
## 60 4.433873 13.588416 11.816679 15.36015 0.8675320          NA
## 61 4.483380 13.323831 11.507432 15.14023 0.8894005          NA
## 62 4.532886 13.059245 11.197541 14.92095 0.9115849          NA
## 63 4.582392 12.794660 10.887050 14.70227 0.9340627          NA
## 64 4.631899 12.530075 10.576002 14.48415 0.9568132          NA
## 65 4.681405 12.265490 10.264436 14.26654 0.9798174          NA
## 66 4.730911 12.000905  9.952387 14.04942 1.0030578          NA
## 67 4.780418 11.736320  9.639889 13.83275 1.0265184          NA
## 68 4.829924 11.471734  9.326972 13.61650 1.0501844          NA
## 69 4.879430 11.207149  9.013662 13.40064 1.0740423          NA
## 70 4.928937 10.942564  8.699986 13.18514 1.0980796          NA
## 71 4.978443 10.677979  8.385968 12.96999 1.1222847          NA
## 72 5.027949 10.413394  8.071628 12.75516 1.1466470          NA
## 73 5.077456 10.148809  7.756987 12.54063 1.1711567          NA
## 74 5.126962  9.884223  7.442064 12.32638 1.1958047          NA
## 75 5.176468  9.619638  7.126876 12.11240 1.2205827          NA
## 76 5.225975  9.355053  6.811438 11.89867 1.2454828          NA
## 77 5.275481  9.090468  6.495765 11.68517 1.2704980          NA
## 78 5.324987  8.825883  6.179871 11.47189 1.2956215          NA
## 79 5.374494  8.561298  5.863768 11.25883 1.3208472          NA
## 80 5.424000  8.296712  5.547468 11.04596 1.3461693          NA
mtcars %>% 
  rename(x = wt, y = mpg, cat = am) %>% 
  compute_group_smooth_error(method = lm, formula = y ~ x, n = 7)
##        x         y flipped_aes  xend yend      ymin  xmin ymax        xmax
## 1  2.620 23.282611          NA 2.620 21.0 23.282611 2.620 21.0  0.33738935
## 2  2.875 21.919770          NA 2.875 21.0 21.919770 2.875 21.0  1.95522960
## 3  2.320 24.885952          NA 2.320 22.8 24.885952 2.320 22.8  0.23404788
## 4  3.215 20.102650          NA 3.215 21.4 20.102650 3.215 21.4  4.51234994
## 5  3.440 18.900144          NA 3.440 18.7 18.900144 3.440 18.7  3.23985604
## 6  3.460 18.793255          NA 3.460 18.1 18.793255 3.460 18.1  2.76674547
## 7  3.570 18.205363          NA 3.570 14.3 18.205363 3.570 14.3 -0.33536265
## 8  3.190 20.236262          NA 3.190 24.4 20.236262 3.190 24.4  7.35373815
## 9  3.150 20.450041          NA 3.150 22.8 20.450041 3.150 22.8  5.49995929
## 10 3.440 18.900144          NA 3.440 19.2 18.900144 3.440 19.2  3.73985604
## 11 3.440 18.900144          NA 3.440 17.8 18.900144 3.440 17.8  2.33985604
## 12 4.070 15.533127          NA 4.070 16.4 15.533127 4.070 16.4  4.93687313
## 13 3.730 17.350247          NA 3.730 17.3 17.350247 3.730 17.3  3.67975280
## 14 3.780 17.083024          NA 3.780 15.2 17.083024 3.780 15.2  1.89697638
## 15 5.250  9.226650          NA 5.250 10.4  9.226650 5.250 10.4  6.42334959
## 16 5.424  8.296712          NA 5.424 10.4  8.296712 5.424 10.4  7.52728764
## 17 5.345  8.718926          NA 5.345 14.7  8.718926 5.345 14.7 11.32607439
## 18 2.200 25.527289          NA 2.200 32.4 25.527289 2.200 32.4  9.07271129
## 19 1.615 28.653805          NA 1.615 30.4 28.653805 1.615 30.4  3.36119542
## 20 1.835 27.478021          NA 1.835 33.9 27.478021 1.835 33.9  8.25697917
## 21 2.465 24.111004          NA 2.465 21.5 24.111004 2.465 21.5 -0.14600374
## 22 3.520 18.472586          NA 3.520 15.5 18.472586 3.520 15.5  0.54741377
## 23 3.435 18.926866          NA 3.435 15.2 18.926866 3.435 15.2 -0.29186632
## 24 3.840 16.762355          NA 3.840 13.3 16.762355 3.840 13.3  0.37764467
## 25 3.845 16.735633          NA 3.845 19.2 16.735633 3.845 19.2  6.30936703
## 26 1.935 26.943574          NA 1.935 27.3 26.943574 1.935 27.3  2.29142633
## 27 2.140 25.847957          NA 2.140 26.0 25.847957 2.140 26.0  2.29204300
## 28 1.513 29.198941          NA 1.513 30.4 29.198941 1.513 30.4  2.71405932
## 29 3.170 20.343151          NA 3.170 15.8 20.343151 3.170 15.8 -1.37315128
## 30 2.770 22.480940          NA 2.770 19.7 22.480940 2.770 19.7 -0.01093991
## 31 3.570 18.205363          NA 3.570 15.0 18.205363 3.570 15.0  0.36463735
## 32 2.780 22.427495          NA 2.780 21.4 22.427495 2.780 21.4  1.75250480
setup_smooth <- function(data, params) {
    params$flipped_aes <- has_flipped_aes(data, params, ambiguous = TRUE)
    msg <- character()
    if (is.null(params$method) || identical(params$method, "auto")) {
      # Use loess for small datasets, gam with a cubic regression basis for
      # larger. Based on size of the _largest_ group to avoid bad memory
      # behaviour of loess
      max_group <- max(table(interaction(data$group, data$PANEL, drop = TRUE)))

      if (max_group < 1000) {
        params$method <- "loess"
      } else {
        params$method <- "gam"
      }
      msg <- c(msg, paste0("method = '", params$method, "'"))
    }

    if (is.null(params$formula)) {
      if (identical(params$method, "gam")) {
        params$formula <- y ~ s(x, bs = "cs")
      } else {
        params$formula <- y ~ x
      }
      msg <- c(msg, paste0("formula = '", deparse(params$formula), "'"))
    }
    if (identical(params$method, "gam")) {
      params$method <- gam_method()
    }

    if (length(msg) > 0) {
      cli::cli_inform("{.fn geom_smooth} using {msg}")
    }

    params
  }



#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatSmooth <- ggplot2::ggproto("StatSmooth", ggplot2::Stat,
  setup_params = setup_smooth,
  extra_params = c("na.rm", "orientation"),
  compute_group = compute_group_smooth,
  dropped_aes = c("weight"),
  required_aes = c("x", "y")
)


StatSmooth2 <- StatSmooth

StatSmoothError$compute_group <- compute_group_smooth_error
## Error in StatSmoothError$compute_group <- compute_group_smooth_error: object 'StatSmoothError' not found
StatSmoothError <- ggplot2::ggproto("StatSmoothError", ggplot2::Stat,
  setup_params = setup_smooth,
  extra_params = c("na.rm", "orientation"),
  compute_group = compute_group_smooth_error,
  dropped_aes = c("weight"),
  required_aes = c("x", "y")
)


stat_smooth <- function(mapping = NULL, data = NULL,
                        geom = "smooth", position = "identity",
                        ...,
                        method = NULL,
                        formula = NULL,
                        se = TRUE,
                        n = 80,
                        span = 0.75,
                        fullrange = FALSE,
                        level = 0.95,
                        method.args = list(),
                        na.rm = FALSE,
                        orientation = NA,
                        show.legend = NA,
                        inherit.aes = TRUE) {
  layer(
    data = data,
    mapping = mapping,
    stat = StatSmooth,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = rlang::list2(
      method = method,
      formula = formula,
      se = se,
      n = n,
      fullrange = fullrange,
      level = level,
      na.rm = na.rm,
      orientation = orientation,
      method.args = method.args,
      span = span,
      ...
    )
  )
}

stat_smooth_obs <- function(mapping = NULL, data = NULL,
                        geom = "smooth", position = "identity",
                        ...,
                        method = NULL,
                        formula = NULL,
                        se = TRUE,
                        n = 80,
                        span = 0.75,
                        fullrange = FALSE,
                        level = 0.95,
                        method.args = list(),
                        na.rm = FALSE,
                        orientation = NA,
                        show.legend = NA,
                        inherit.aes = TRUE) {
  layer(
    data = data,
    mapping = mapping,
    stat = StatSmoothError,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = rlang::list2(
      method = method,
      formula = formula,
      se = se,
      n = n,
      fullrange = fullrange,
      level = level,
      na.rm = na.rm,
      orientation = orientation,
      method.args = method.args,
      span = span,
      ...
    )
  )
}

Basically working!

mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  geom_point() +
  geom_smooth(alpha = .2, se = FALSE) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

last_plot() +
  stat_smooth_obs(geom = "point", color = "blue") +  # wrap as geom_smooth_fit()
  stat_smooth_obs(geom = "segment") # geom_smooth_error()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

even squared error (best w/ stdz and coord_equal)

stdz <- function(x){
  
  var_mean <- mean(x) 
  var_sd <- sd(x)
  
  (x-var_mean)/var_sd
  
}

last_plot() + 
  stat_smooth_obs(geom = "rect", alpha = .1)  + # geom_smooth_error_sq() +
  aes(stdz(wt), stdz(mpg)) + 
  coord_equal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

mtcars %>% 
  ggplot() +
  aes(wt, mpg) + 
  geom_point() +
  geom_smooth(alpha = .2, se = FALSE, method = lm) + 
  stat_smooth_obs(geom = "point", color = "blue", method = lm) + # wrap as geom_smooth_fit()
  stat_smooth_obs(geom = "segment", method = lm)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

issues