Intro Thoughts

Status Quo

library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.1

Goal

ggplot(data = cars) + 
  aes(y = dist) + # x is row number
  geom_outcome() +
  geom_model_fit()

ggplot(data = cars) + 
  aes(y = dist) + 
  geom_model_fit(formula = y ~ .)

ggplot(data = cars) + 
  aes(y = dist, x = speed) + 
  geom_model_fit(formula = y ~ .)

Experiment

compute_panel_outcome <- function(data, scales){
  
  data |> 
    mutate(x = row_number())
  
}

library(statexpress)
ggplot(mtcars) +
  aes(y = mpg) + 
  geom_point(stat = qstat_panel(compute_panel_outcome))

model <- lm(mpg~ disp, mtcars)
model$fitted.values
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##            23.00544            23.00544            25.14862            18.96635 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##            14.76241            20.32645            14.76241            23.55360 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##            23.79677            22.69220            22.69220            18.23272 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##            18.23272            18.23272            10.14632            10.64090 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##            11.46520            26.35622            26.47987            26.66946 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##            24.64992            16.49345            17.07046            15.17456 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##            13.11381            26.34386            24.64168            25.68030 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##            15.13335            23.62366            17.19410            24.61283
compute_panel_model <- function(data, scales, formula = y ~ .){
  
  y_var_names <- c()
  
  for(i in 1:ncol(data)){
    
    is_identical <- identical(data$y, data[,i])
    var_name <- names(data)[i]
    
    y_var_names <- c(y_var_names, if(is_identical){var_name}else{c()})
    
  }
  
  y_var_names <- y_var_names[y_var_names != "y"]
  
  # if(length(y_var_names) > 0){
  model <- lm(formula, data = data |> select(-group, - PANEL, -y_var_names))
  # }else{model <- lm(formula, data = data |> select(-group, - PANEL))}

  data$y <- model$fitted.values

  data |>
    mutate(x = row_number())
  
}

model <- lm(mpg ~ mpg, mtcars)
## Warning in model.matrix.default(mt, mf, contrasts): the response appeared on
## the right-hand side and was dropped
## Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
## model.matrix: no columns are assigned
model$residuals
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##            0.909375            0.909375            2.709375            1.309375 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##           -1.390625           -1.990625           -5.790625            4.309375 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##            2.709375           -0.890625           -2.290625           -3.690625 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##           -2.790625           -4.890625           -9.690625           -9.690625 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##           -5.390625           12.309375           10.309375           13.809375 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##            1.409375           -4.590625           -4.890625           -6.790625 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##           -0.890625            7.209375            5.909375           10.309375 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##           -4.290625           -0.390625           -5.090625            1.309375
mtcars |> 
  select(y = mpg, t = disp, x = mpg, group = 2, PANEL = 2) |> 
  compute_panel_model()
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(y_var_names)
## 
##   # Now:
##   data %>% select(all_of(y_var_names))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##                            y     t  x group PANEL
## Mazda RX4           23.00544 160.0  1     6     6
## Mazda RX4 Wag       23.00544 160.0  2     6     6
## Datsun 710          25.14862 108.0  3     4     4
## Hornet 4 Drive      18.96635 258.0  4     6     6
## Hornet Sportabout   14.76241 360.0  5     8     8
## Valiant             20.32645 225.0  6     6     6
## Duster 360          14.76241 360.0  7     8     8
## Merc 240D           23.55360 146.7  8     4     4
## Merc 230            23.79677 140.8  9     4     4
## Merc 280            22.69220 167.6 10     6     6
## Merc 280C           22.69220 167.6 11     6     6
## Merc 450SE          18.23272 275.8 12     8     8
## Merc 450SL          18.23272 275.8 13     8     8
## Merc 450SLC         18.23272 275.8 14     8     8
## Cadillac Fleetwood  10.14632 472.0 15     8     8
## Lincoln Continental 10.64090 460.0 16     8     8
## Chrysler Imperial   11.46520 440.0 17     8     8
## Fiat 128            26.35622  78.7 18     4     4
## Honda Civic         26.47987  75.7 19     4     4
## Toyota Corolla      26.66946  71.1 20     4     4
## Toyota Corona       24.64992 120.1 21     4     4
## Dodge Challenger    16.49345 318.0 22     8     8
## AMC Javelin         17.07046 304.0 23     8     8
## Camaro Z28          15.17456 350.0 24     8     8
## Pontiac Firebird    13.11381 400.0 25     8     8
## Fiat X1-9           26.34386  79.0 26     4     4
## Porsche 914-2       24.64168 120.3 27     4     4
## Lotus Europa        25.68030  95.1 28     4     4
## Ford Pantera L      15.13335 351.0 29     8     8
## Ferrari Dino        23.62366 145.0 30     6     6
## Maserati Bora       17.19410 301.0 31     8     8
## Volvo 142E          24.61283 121.0 32     4     4
library(statexpress)
ggplot(mtcars) +
  aes(y = mpg) +
  geom_point(stat = qstat_panel(compute_panel_outcome))

last_plot() + 
  aes_all("cyl") +
  # geom model
  geom_point(stat = qstat_panel(compute_panel_model), color = "blue") +
  NULL

last_plot() + 
  aes_all("gear") 

last_plot() + 
  aes_all(names(mtcars)[2:11]) # the rest but not y

last_plot() + 
  aes_all(names(mtcars)) # uh-oh y predicts y perfectly...

library(statexpress)
ggplot(mtcars) +
  aes(y = mpg) +
  # geom_outcome
  geom_point(stat = qstat_panel(compute_panel_outcome)) + 
  ggplyr::aes_from_data() + 
  # geom_lm_predictions
  geom_point(stat = qstat_panel(compute_panel_model), color = "blue") 

layer_data(i = 2) |> head()
##    mpg cyl disp  hp drat    wt  qsec vs am gear carb        y PANEL group x
## 1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4 22.59951     1    -1 1
## 2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4 22.11189     1    -1 2
## 3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1 26.25064     1    -1 3
## 4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1 21.23740     1    -1 4
## 5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2 17.69343     1    -1 5
## 6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1 20.38304     1    -1 6
##   shape colour fill size alpha stroke
## 1    19   blue   NA  1.5    NA    0.5
## 2    19   blue   NA  1.5    NA    0.5
## 3    19   blue   NA  1.5    NA    0.5
## 4    19   blue   NA  1.5    NA    0.5
## 5    19   blue   NA  1.5    NA    0.5
## 6    19   blue   NA  1.5    NA    0.5
library(statexpress)
ggplot(mtcars) +
  aes(y = mpg, color = factor(cyl)) +
  # geom_outcome
  geom_point(stat = qstat_panel(compute_panel_outcome)) + 
  ggplyr::aes_from_data() + 
  # geom_lm_predictions
  geom_point(stat = qstat_panel(compute_panel_model), 
             color = "blue",
             formula = y ~ drat + cyl)

layer_data(i = 2) |> head()
##    mpg cyl disp  hp drat    wt  qsec vs am gear carb        y PANEL group x
## 1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4 21.12431     1    -1 1
## 2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4 21.12431     1    -1 2
## 3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1 25.99774     1    -1 3
## 4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1 19.58929     1    -1 4
## 5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2 14.75330     1    -1 5
## 6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1 18.99025     1    -1 6
##   shape colour fill size alpha stroke
## 1    19   blue   NA  1.5    NA    0.5
## 2    19   blue   NA  1.5    NA    0.5
## 3    19   blue   NA  1.5    NA    0.5
## 4    19   blue   NA  1.5    NA    0.5
## 5    19   blue   NA  1.5    NA    0.5
## 6    19   blue   NA  1.5    NA    0.5

Closing remarks, Other Relevant Work, Caveats