From Campitelli 2018 which creats stat_rasa, now stat_group() and stat_panel() and stat_panel_sf

https://eliocamp.github.io/codigo-r/en/2018/05/how-to-make-a-generic-stat-in-ggplot2/

compute_rasa()

compute_rasa <- function(data, scales, compute_fun, fun.args) {
     # Change default arguments of the function to the 
     # values in fun.args
     args <- formals(compute_fun)
     for (i in seq_along(fun.args)) {
        if (names(fun.args[i]) %in% names(fun.args)) {
           args[[names(fun.args[i])]] <- fun.args[[i]]
        } 
     }
     formals(compute_fun) <- args
     
     # Apply function to data
     compute_fun(data)
}

then define StatRasagroup and stat_group()

StatRasagroup <- ggplot2::ggproto(
  `_class` = "StatRasagroup", 
  `_inherit` = ggplot2::Stat,
  compute_group = compute_rasa
  )

# stat function used in ggplot - but reordered from conventional!
stat_group <- function(compute_fun = NULL,
                       geom = "point", 
                       mapping = NULL, 
                       data = NULL,
                       position = "identity",
                       required_aes = NULL, 
                       default_aes = NULL, 
                       dropped_aes = NULL,
                      ...,
                      show.legend = NA,
                      inherit.aes = TRUE) {
   # Check arguments 
   if (!is.function(compute_fun)) stop("compute_group_fun must be a function")
   
   # Pass dotted arguments to a list
   fun.args <- match.call(expand.dots = FALSE)$`...`
   
   if(!is.null(required_aes)){StatRasagroup$required_aes <- required_aes}
   if(!is.null(default_aes)){StatRasagroup$default_aes <- default_aes}
   if(!is.null(dropped_aes)){StatRasagroup$dropped_aes <- dropped_aes}
   
   ggplot2::layer(
      data = data,
      mapping = mapping,
      stat = StatRasagroup,
      geom = geom,
      position = position,
      show.legend = show.legend,
      inherit.aes = inherit.aes,
      check.aes = FALSE,
      check.param = FALSE,
      params = list(
         compute_fun = compute_fun, 
         fun.args = fun.args,
         na.rm = FALSE,
         ...
      )
   )
}

define StatRasapanel and stat_panel

library(tidyverse)


StatRasapanel <- 
  ggplot2::ggproto("StatRasapanel", 
                   ggplot2::Stat,
                   compute_panel = compute_rasa)


# stat function used in ggplot - we reorder from conventional
stat_panel <- function(compute_fun = NULL, 
                       geom = "point", 
                       mapping = NULL, data = NULL,
                      position = "identity",
                      required_aes = NULL,
                      default_aes = NULL,
                      dropped_aes = NULL,
                      ...,
                      show.legend = NA,
                      inherit.aes = TRUE) {
   # Check arguments 
   if (!is.function(compute_fun)) stop ("compute_fun must be a function")
   
   # Pass dotted arguments to a list
   fun.args <- match.call(expand.dots = FALSE)$`...`
   
   if(!is.null(required_aes)){StatRasapanel$required_aes <- required_aes}
   if(!is.null(default_aes)){StatRasapanel$default_aes <- default_aes}
   if(!is.null(dropped_aes)){StatRasapanel$dropped_aes <- dropped_aes}

   ggplot2::layer(
      data = data,
      mapping = mapping,
      stat = StatRasapanel,
      geom = geom,
      position = position,
      show.legend = show.legend,
      inherit.aes = inherit.aes,
      check.aes = FALSE,
      check.param = FALSE,
      params = list(
         compute_fun = compute_fun, 
         fun.args = fun.args,
         na.rm = FALSE,
         ...
      )
   )
}

define stat_panel_sf()

# stat function used in ggplot - we reorder from conventional
stat_panel_sf <- function(geo_ref_data = NULL,
                          compute_fun = NULL, 
                       geom = "sf", 
                       mapping = NULL, 
                       data = NULL,
                      position = "identity",
                      required_aes = NULL,
                      default_aes = NULL,
                      dropped_aes = NULL,
                      ...,
                      show.legend = NA,
                      inherit.aes = TRUE,
                      na.rm = FALSE,
                      crs = "NAD27" # "NAD27", 5070, "WGS84", "NAD83", 4326 , 3857
) {
  
    if(!is.null(geo_ref_data) & is.null(compute_fun)){
    
      
      compute_fun <- function(data, keep_id = NULL, 
                                       drop_id = NULL, 
                                       stamp = FALSE){
  
             if(!stamp){data <- dplyr::inner_join(data, geo_ref_data)}
             if( stamp){data <- geo_ref_data }
             
             if(!is.null(keep_id)){ data <- dplyr::filter(data, id_col %in% keep_id) }
             if(!is.null(drop_id)){ data <- dplyr::filter(data, !(id_col %in% drop_id))}
             
             data
  
      }
    
    
  }
  
  
   # Check arguments 
   if (!is.function(compute_fun)) stop ("compute_fun must be a function")
   
   # Pass dotted arguments to a list
   fun.args <- match.call(expand.dots = FALSE)$`...`
   
   if(!is.null(required_aes)){StatRasapanel$required_aes <- required_aes}
   if(!is.null(default_aes)){StatRasapanel$default_aes <- default_aes}
   if(!is.null(dropped_aes)){StatRasapanel$dropped_aes <- dropped_aes}
   
     c(ggplot2::layer_sf(
              data = data,
              mapping = mapping,
              stat = StatRasapanel,  # proto object from step 2
              geom = geom,  # inherit other behavior
              position = position,
              show.legend = show.legend,
              inherit.aes = inherit.aes,
              check.aes = FALSE,
              check.param = FALSE,
              params = rlang::list2(
                compute_fun = compute_fun, 
                fun.args = fun.args,
                na.rm = na.rm, ...)
              ),
              
              coord_sf(crs = crs,
                       default_crs = sf::st_crs(crs),
                       datum = crs,
                       default = TRUE)
     )
   
}

stat_group(fun = summarize_xy)

group_means <- function(data){
  
 data %>% 
    summarise(x = mean(x),
              y = mean(y))
  
}

geom_means <- function(...){
  stat_group(compute_fun = group_means, ...)
}

mtcars |>
  ggplot() + 
  aes(x = wt,
      y = mpg) + 
  geom_point() + 
  geom_means(size = 6)

last_plot() + 
  aes(color = factor(cyl))

group_label_at_center <- function(data){
  
 data %>% 
    group_by(label) %>% 
    summarise(x = mean(x, na.rm = T),
              y = mean(y, na.rm = T))
  
}

geom_center_label <- function(...){
  stat_group(compute_fun = group_label_at_center, geom = GeomLabel, ...)
}


palmerpenguins::penguins |>
  ggplot() +
  aes(x = bill_length_mm,
      y = bill_depth_mm) +
  geom_point() +
  aes(label = "All") +
  geom_center_label()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

last_plot() +
  aes(color = species, label = species)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

geom_center_text <- function(...){
  stat_group(compute_fun = group_label_at_center, geom = GeomText, ...)
}


palmerpenguins::penguins |>
  ggplot() +
  aes(x = bill_length_mm,
      y = bill_depth_mm) +
  geom_point(x = 42, aes(color = bill_length_mm)) +
  aes(color = species) +
  geom_center_text(color = "Black",
    aes(label = species), 
                     
                    alpha = .8,
                   size = 5, 
                   fontface = "bold")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

layer_data(i = 2)
##       label        x        y PANEL group colour size angle hjust vjust alpha
## 1    Adelie 38.79139 18.34636     1    -1  Black    5     0   0.5   0.5   0.8
## 2 Chinstrap 48.83382 18.42059     1    -1  Black    5     0   0.5   0.5   0.8
## 3    Gentoo 47.50488 14.98211     1    -1  Black    5     0   0.5   0.5   0.8
##   family fontface lineheight
## 1            bold        1.2
## 2            bold        1.2
## 3            bold        1.2
compute_post <- function(data){
  
  data %>% 
    mutate(xend = x,
           yend = 0)
  
}

geom_post <- function(...){
  stat_group(compute_fun = compute_post, geom = "segment", ...)
}


data.frame(outcome = 0:1, prob = c(.4, .6)) |>
  ggplot() + 
  aes(x = outcome,
      y = prob) + 
  geom_post() + 
  geom_point() + 
  labs(title = "probability by outcome")

compute_xmean <- function(data){
  
  data %>% 
    summarize(xintercept = mean(x))
  
}

geom_xmean <- function(...){
  stat_group(compute_fun = compute_xmean, geom = "vline", 
             dropped_aes = c("x", "y"), ...)
}

mtcars |>
  ggplot() + 
  aes(x = wt,
      y = mpg) + 
  geom_point() + 
  geom_xmean(linetype = "dashed")

last_plot() + 
  aes(color = factor(cyl))

compute_xy_quantile <- function(data, q = .25){
  
  data %>% 
    summarise(x = quantile(x, q),
              y = quantile(y, q)) 
  
}

geom_quantile <- function(...){
  stat_group(compute_fun = compute_xy_quantile, geom = "point", ...)
}

mtcars |>
  ggplot() +
  aes(x = wt,
      y = mpg) +
  geom_point() +
  geom_quantile(size = 8) +
  geom_quantile(size = 8, q = .75)

Stat Rasa one-liners

Since were organize with variable function input in first position and geom in secton position, and we can do one-liners (or two) use positioning for arguments.

geom_xmean_line() in 137 characters

library(tidyverse)
geom_xmean_line <- function(...){stat_group(function(df) df |> summarize(xintercept = mean(x)), "vline", dropped_aes = c("x", "y"), ...)}


ggplot(cars) +
  aes(speed, dist) + 
  geom_point() + 
  geom_xmean_line(linetype = 'dashed')

last_plot() + 
  aes(color = dist > 50)

geom_xmean <- function(...){stat_group(function(df) df |> summarize(x = mean(x), y = I(.05)), "point", ...)}


ggplot(cars) +
  aes(speed, dist) + 
  geom_point() + 
  geom_xmean(size = 8, shape = "diamond") 

geom_post() in 101 characters

geom_post <- function(...){stat_group(function(df) df |> mutate(xend = x, yend = 0), "segment", ...)}

data.frame(prob = c(.4,.6), outcome = c(0, 1)) %>% 
ggplot(data = .) +
  aes(outcome, prob) + 
  geom_post() +
  geom_point() 

geom_expectedvalue <- function(...){stat_group(function(df) df |> summarise(x = sum(x*y), y = 0), "point", ...)} 

last_plot() + 
  geom_expectedvalue()

geom_expectedvalue_label <- function(...){stat_group(function(df) df |> summarise(x = sum(x*y), y = 0) |> mutate(label = round(x, 2)), "text", hjust = 1, vjust = 0, ...)} 

last_plot() +
  geom_expectedvalue_label()

geom_proportion() and geom_proportion_label()

rep(1, 15) |> 
  c(0) %>% 
  data.frame(outcome = .) |>
  ggplot() + 
  aes(x = outcome) + 
  geom_dotplot()
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

geom_proportion <- function(...){stat_panel(function(df) df |> summarise(x = sum(x)/length(x), y = 0), "point", ...)}   # this should work for T/F too when rasa_p is in play

last_plot() + 
  geom_proportion()
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

geom_proportion_label <- function(...){stat_panel(function(df) df |> summarise(x = sum(x)/length(x), y = 0) |> mutate(label = round(x,2)), vjust = 0, "text", ...)}   # this should work for T/F too when rasa_p is in play

last_plot() + 
  geom_proportion_label()
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# last_plot() + 
#   geom_proportion_label() + 
#   ggsample::facet_bootstrap()
# 
# layer_data(i = 2)
# 
# 
# 
# rep(0:1, 10000) %>% # very large 50/50 sample
#   data.frame(outcome = .) |>
#   ggplot() + 
#   aes(x = outcome) + 
#   geom_dotplot() +
#   geom_proportion() + 
#   geom_proportion_label() + 
#   ggsample::facet_sample(n_facets = 25,
#     n_sampled = 16) ->
# p; p 
#   
# 
# layer_data(p, i = 2) |>
#   ggplot() + 
#   aes(x = x) + 
#   geom_rug() + 
#   geom_histogram()

geom_means in 131 characters

geom_means <- function(...){stat_group(function(df) df |> summarize(x = mean(x, na.rm = T), y = mean(y, na.rm = T)), "point", ...)}

palmerpenguins::penguins %>% 
  ggplot() + 
  aes(bill_length_mm, bill_depth_mm) + 
  geom_point() + 
  geom_means(size = 7)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

last_plot() +
  geom_means(size = 5, aes(color = bill_depth_mm > 17))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## geom_grouplabel_at_means()

geom_grouplabel_at_means <-  function(...){stat_group(function(df) df |> group_by(label) |> summarize(x = mean(x, na.rm = T), y = mean(y, na.rm = T)), "label", ...)}

palmerpenguins::penguins %>% 
  ggplot() + 
  aes(bill_length_mm, bill_depth_mm, label = species, group = species) + 
  geom_point() + 
  geom_grouplabel_at_means(size = 7)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

compute_panel_highlight_lines()

compute_panel_highlight_lines <- function(data, which_id = NULL){

data %>% 
  mutate(ind_id = id %in% which_id) %>% 
  arrange(ind_id) %>%
  mutate(group = fct_inorder(id))
  
}

geom_line_highlight <- function(...){stat_panel(compute_panel_fun = compute_panel_highlight_lines, 
             geom = "line", 
             default_aes = aes(color = after_stat(ind_id)),
             ...)
}

gapminder::gapminder %>% 
  filter(continent == "Americas") %>% 
  ggplot() + 
  aes(x = year, y = lifeExp, id = country) + 
  geom_point() + 
  geom_line_highlight(which_id = "Bolivia",
                      linewidth = 3)
## Error in stat_panel(compute_panel_fun = compute_panel_highlight_lines, : compute_fun must be a function

Then using stat_panel_sf

nc <- sf::st_read(system.file("shape/nc.shp", package="sf"))
## Reading layer `nc' from data source 
##   `/Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library/sf/shape/nc.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 100 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
## Geodetic CRS:  NAD27
geo_reference_northcarolina_county <- nc |>
  dplyr::select(county_name = NAME, fips = FIPS) |>
  sf2stat:::sf_df_prep_for_stat(id_col_name = "county_name")
## Warning in st_point_on_surface.sfc(sf::st_zm(dplyr::pull(sf_df, geometry))):
## st_point_on_surface may not give correct results for longitude/latitude data
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## ℹ The deprecated feature was likely used in the sf2stat package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# failing with default aes... :-(

stat_nc_counties <- function(...){
  
  stat_panel_sf(geo_ref_data = geo_reference_northcarolina_county, ...)
  
}

geo_reference_northcarolina_county %>% 
  sf::st_drop_geometry() %>% 
ggplot() + 
  aes(fips = fips) +
  stat_nc_counties() + 
  stat_nc_counties(geom = "text", 
                   mapping = aes(label = after_stat(id_col)))
## Joining with `by = join_by(fips)`
## Joining with `by = join_by(fips)`