Intro Thoughts

Status Quo

library(tidyverse)

shiny_revdeps <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-16/shiny_revdeps.csv')
## Rows: 146135 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): child, dependency_type, parent
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
package_details <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-16/package_details.csv')
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 18474 Columns: 63
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (51): Package, Version, Depends, Imports, LinkingTo, Suggests, Enhances...
## lgl  (11): Priority, License_is_FOSS, License_restricts_use, OS_type, BuildK...
## date  (1): Published
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(ggraph)
# package_details %>% 
#   # head() %>% 
#   select(Package, Author)  %>% 
#   mutate(Author = str_split(Author, ",\n ")) %>% 
#   unnest(Author) %>% 
#   mutate(Author = str_remove_all(Author, "\\[.+")) %>%
#   mutate(Author = str_remove_all(Author, "\\<.+")) %>% 
#   mutate(Author = str_trim(Author)) %>% 
#   group_by(Author) %>% 
#   filter(n() > 30) %>% 
#   head(300) %>% 
#   ggedgelist:::ggedgelist_quick(layout = "fr", include_names = T)   


package_details %>% 
  # head() %>% 
  select(Package, Author)  %>% 
  mutate(Author = str_split(Author, ",\n ")) %>% 
  unnest(Author) %>% 
  mutate(Author = str_remove_all(Author, "\\[.+")) %>%
  mutate(Author = str_remove_all(Author, "\\<.+")) %>% 
  mutate(Author = str_trim(Author)) %>% 
  distinct(Author) %>% 
  mutate(first_name = str_extract(Author, "\\w+")) ->
author_df  
  
# author_df %>% 
#   pull(first_name) %>%
#   gender::gender() %>% 
#   distinct() %>% arrange(proportion_male)
#   rename(first_name = name) %>% 
#   left_join(author_df) %>% 
#   ggplot() + 
#   aes(x = proportion_male) + 
#   geom_histogram()
package_details %>% 
  select(Author, Maintainer) %>% 
  mutate(first_name = str_extract(Maintainer, "\\w+")) %>% 
  pull(first_name) %>% 
  gender::gender() %>% 
  ggplot() + 
  aes(proportion_male) + 
  geom_histogram() +
  aes(fill = proportion_male)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

geom_histogram
## function (mapping = NULL, data = NULL, stat = "bin", position = "stack", 
##     ..., binwidth = NULL, bins = NULL, na.rm = FALSE, orientation = NA, 
##     show.legend = NA, inherit.aes = TRUE) 
## {
##     layer(data = data, mapping = mapping, stat = stat, geom = GeomBar, 
##         position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
##         params = list2(binwidth = binwidth, bins = bins, na.rm = na.rm, 
##             orientation = orientation, pad = FALSE, ...))
## }
## <bytecode: 0x7fe5508905f0>
## <environment: namespace:ggplot2>
GeomBar$default_aes
## Aesthetic mapping: 
## * `colour`    -> NA
## * `fill`      -> "grey35"
## * `linewidth` -> 0.5
## * `linetype`  -> 1
## * `alpha`     -> NA
# what vars are created
ggtrace::layer_after_stat() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## ✔ Executed `ggtrace_inspect_return(last_plot(), ggplot2:::Layer$compute_statistic)`
## # A tibble: 30 × 11
##    count      x    xmin   xmax  width density  ncount ndensity flipped_aes PANEL
##    <dbl>  <dbl>   <dbl>  <dbl>  <dbl>   <dbl>   <dbl>    <dbl> <lgl>       <fct>
##  1  1439 0      -0.0172 0.0172 0.0345  2.78   0.128    0.128   FALSE       1    
##  2    88 0.0345  0.0172 0.0517 0.0345  0.170  0.00785  0.00785 FALSE       1    
##  3    87 0.0690  0.0517 0.0862 0.0345  0.168  0.00776  0.00776 FALSE       1    
##  4    80 0.103   0.0862 0.121  0.0345  0.154  0.00713  0.00713 FALSE       1    
##  5   131 0.138   0.121  0.155  0.0345  0.253  0.0117   0.0117  FALSE       1    
##  6    38 0.172   0.155  0.190  0.0345  0.0733 0.00339  0.00339 FALSE       1    
##  7    18 0.207   0.190  0.224  0.0345  0.0347 0.00161  0.00161 FALSE       1    
##  8    44 0.241   0.224  0.259  0.0345  0.0849 0.00392  0.00392 FALSE       1    
##  9   129 0.276   0.259  0.293  0.0345  0.249  0.0115   0.0115  FALSE       1    
## 10    22 0.310   0.293  0.328  0.0345  0.0424 0.00196  0.00196 FALSE       1    
## # ℹ 20 more rows
## # ℹ 1 more variable: group <int>
last_plot() +
  aes(fill = after_stat(x)) + 
  labs(fill = "Proportion") +
  scale_fill_gradientn(colors = c("blue", "grey", "orange")) + 
  ma206distributions:::geom_fulcrum() + 
  labs(title = "A look at the Shiny-dependant packages' maintainers",
       subtitle = "For the 18,474 are package maintainers first names usually given to male or female children...")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Experiment

Closing remarks, Other Relevant Work, Caveats