Who are the ggplot2 extenders?

user_repo_fun <- read_csv("https://raw.githubusercontent.com/EvaMaeRey/mytidytuesday/refs/heads/main/2024-11-19-gg-prefixes/exported_funs_exts_ggplot2_tidyverse_org.csv") %>% 

  
  mutate(prefix_short = fun_exported |> str_extract(".*?_")) %>% 
  mutate(prefix_long = fun_exported |> str_extract("scale_.*?_|.*?_"))

# user_repo_fun$user |> unique() %>% paste0("@", ., " ") |> cat()

sample_n(user_repo_fun, 20) |>
  knitr::kable()

user	repo	fun_exported	prefix_short	prefix_long
LCBC-UiO	ggseg	reposition_brain	reposition_	reposition_
hrbrmstr	hrbrthemes	modern_geom_defaults	modern_	modern_
davidgohel	ggiraph	scale_color_gradient2_interactive	scale_	scale_color_
jtlandis	ggside	scale_ycolor_manual	scale_	scale_ycolor_
clauswilke	ggridges	scale_vline_width_continuous	scale_	scale_vline_
davidgohel	ggiraph	interactive_raster_grob	interactive_	interactive_
thomasp85	ggfx	mask_raster	mask_	mask_
piecepackr	piecepackr	has_font	has_	has_
dieghernan	tidyterra	scale_fill_hypso_tint_c	scale_	scale_fill_
ProjectMOSAIC	ggformula	gf_sina	gf_	gf_
terrytangyuan	autoplotly	“%>%”	NA	NA
ProjectMOSAIC	ggformula	gf_pointrange	gf_	gf_
davidgohel	ggiraph	opts_hover	opts_	opts_
shikokuchuo	ichimoku	oanda_studio	oanda_	oanda_
dieghernan	tidyterra	left_join	left_	left_
sachsmc	plotROC	direct_label	direct_	direct_
hrbrmstr	hrbrthemes	ft_text_col	ft_	ft_
davidhodge931	ggblanket	navy	NA	NA
ProjectMOSAIC	ggformula	gf_facet_wrap	gf_	gf_
thomasp85	ggraph	scale_edge_fill_fermenter	scale_	scale_edge_

data_filter <- function(.keep, .by) {
  structure(list(keep_specification = rlang::enquo(.keep), 
                 by_specification = rlang::enquo(.by)), 
            class = "filterobs")
}

ggplot_add.filterobs <- function(object, plot, object_name) {
  
  new_data <- dplyr::filter(plot$data, 
                            !!object$keep_specification, 
                            .by = !!object$by_specification)
  plot$data <- new_data
  plot

}

data_nest <- function(.by) {
  structure(list(by_specification = rlang::enquo(.by)),
            class = "data_nestvar")
  
}

ggplot_add.data_nestvar <- function(object, plot, object_name) {

  
  new_data <- tidyr::nest(plot$data, 
                          .by = !! object$by_specification)
    
  plot$data <- new_data
  plot

}


data_unnest <- function(cols) {
  structure(list(),
            class = "data_unnestvar")
  
}

ggplot_add.data_unnestvar <- function(object, plot, object_name) {

  
  new_data <- tidyr::unnest(plot$data, cols = "data")
    
  plot$data <- new_data
  
  plot

}

the data frame to be plotted is all the exported functions from the , using ggplot(data = user_repo_fun)

let’s look at a count of all the exported functions first, using aes(id = "All exported functions")

Using circlepacking, we automatically have circles size representing the number of observation, i.e. exported functions, using ggcirclepack::geom_circlepack()

Of course this is hard to interpret without some kind of label. We use geom_circplepack_text to do this for us, using ggcirclepack::geom_circlepack_text()

and lets square up the circles, using coord_equal()

we’ll add a theme, using ggchalkboard:::theme_glassboard()

And remove axes… , using theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())

First we ask what packages - github repository names - are present, using aes(id = repo)

Then let’s look at who is writing these exported functions, using aes(id = user)

an extender’s an extender no matter how small, using data_nest(.by = user)

shrink sizes, using scale_size(range = 1.75)

extender by number of repos, using data_unnest() + data_nest(.by = c(user, repo))

back to default size scales, using scale_size()

back to record per extender (user), using data_unnest()

And we can look at what types of functions are exported, by looking at prefixes, using aes(id = prefix_short)

Disaggregating a little, to longer prefixes like scale_color, we get a more granular look at exported function types, using aes(id = prefix_long)

and we filter more popular prefixes, using data_filter(n() > 60 & !is.na(prefix_long), .by = prefix_long)

and break up our plot space by these prefixes, using facet_wrap(~prefix_short)

gf and add_ aren’t really in-grammar prefixes, using data_filter(!(prefix_short %in% c("gf_","add_")))

let’s look at top prefixes by user, using aes(id = user)

and look at the prolific authors in each of these areas, using data_filter(n() >= 10, .by = c(user, prefix_short))

and show them equally, using data_nest(c(user, prefix_short)) + scale_size(range = 1.7)

  ggplot(data = user_repo_fun)

last_plot() +
    aes(id = "All exported functions")

last_plot() +
    ggcirclepack::geom_circlepack()

last_plot() +
    ggcirclepack::geom_circlepack_text()

last_plot() +
    coord_equal()

last_plot() +
    ggchalkboard:::theme_glassboard()

last_plot() +
    theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())

last_plot() +
    aes(id = repo)

last_plot() +
    aes(id = user)

last_plot() +
    data_nest(.by = user)

last_plot() +
    scale_size(range = 1.75)

last_plot() +
    data_unnest() + data_nest(.by = c(user, repo))

last_plot() +
    scale_size()

last_plot() +
    data_unnest()

last_plot() +
    aes(id = prefix_short)

last_plot() +
    aes(id = prefix_long)

last_plot() +
    data_filter(n() > 60 & !is.na(prefix_long), .by = prefix_long)

last_plot() +
    facet_wrap(~prefix_short)

last_plot() +
    data_filter(!(prefix_short %in% c("gf_","add_")))

last_plot() +
    aes(id = user)

last_plot() +
    data_filter(n() >= 10, .by = c(user, prefix_short))

last_plot() +
    data_nest(c(user, prefix_short)) + scale_size(range = 1.7)

last_plot()$plot_data

NULL

Here is the complete ‘conversation’ with the dataset!

ggplot(data = user_repo_fun) + # the data frame to be plotted is all the exported functions from the 
  aes(id = "All exported functions") + # let's look at a count of all the exported functions first
  ggcirclepack::geom_circlepack() + # Using circlepacking, we automatically have circles size representing the number of observation, i.e. exported functions
  ggcirclepack::geom_circlepack_text() + # Of course this is hard to interpret without some kind of label.  We use geom_circplepack_text to do this for us
  coord_equal() + # and lets square up the circles
  ggchalkboard:::theme_glassboard() + # we'll add a theme
  theme(axis.line = element_blank(), axis.text = element_blank(), axis.ticks = element_blank()) + # And remove axes... 
  aes(id = repo) + # First we ask what packages - github repository names - are present
  aes(id = user) + # Then let's look at who is writing these exported functions
  data_nest(.by = user) + # an extender's an extender no matter how small
  scale_size(range = 1.75) + #shrink sizes
  data_unnest() + data_nest(.by = c(user, repo)) + # extender by number of repos
  scale_size() + # back to default size scales
  data_unnest() + # back to record per extender (user)
  aes(id = prefix_short) + # And we can look at what types of functions are exported, by looking at prefixes
  aes(id = prefix_long) + # Disaggregating a little, to longer prefixes like scale_color, we get a more granular look at exported function types
  data_filter(n() > 60 & !is.na(prefix_long), .by = prefix_long) + # and we filter more popular prefixes
  facet_wrap(~prefix_short) + # and break up our plot space by these prefixes
  data_filter(!(prefix_short %in% c("gf_","add_"))) + #gf and add_ aren't really in-grammar prefixes
  aes(id = user) + # let's look at top prefixes by user
  data_filter(n() >= 10, .by = c(user, prefix_short)) + # and look at the prolific authors in each of these areas
  data_nest(c(user, prefix_short)) + scale_size(range = 1.7) # and show them equally