Status Quo
library(tidyverse)
shiny_revdeps <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-16/shiny_revdeps.csv')
## Rows: 146135 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): child, dependency_type, parent
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
package_details <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-04-16/package_details.csv')
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 18474 Columns: 63
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (51): Package, Version, Depends, Imports, LinkingTo, Suggests, Enhances...
## lgl (11): Priority, License_is_FOSS, License_restricts_use, OS_type, BuildK...
## date (1): Published
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(ggraph)
# package_details %>%
# # head() %>%
# select(Package, Author) %>%
# mutate(Author = str_split(Author, ",\n ")) %>%
# unnest(Author) %>%
# mutate(Author = str_remove_all(Author, "\\[.+")) %>%
# mutate(Author = str_remove_all(Author, "\\<.+")) %>%
# mutate(Author = str_trim(Author)) %>%
# group_by(Author) %>%
# filter(n() > 30) %>%
# head(300) %>%
# ggedgelist:::ggedgelist_quick(layout = "fr", include_names = T)
package_details %>%
# head() %>%
select(Package, Author) %>%
mutate(Author = str_split(Author, ",\n ")) %>%
unnest(Author) %>%
mutate(Author = str_remove_all(Author, "\\[.+")) %>%
mutate(Author = str_remove_all(Author, "\\<.+")) %>%
mutate(Author = str_trim(Author)) %>%
distinct(Author) %>%
mutate(first_name = str_extract(Author, "\\w+")) ->
author_df
# author_df %>%
# pull(first_name) %>%
# gender::gender() %>%
# distinct() %>% arrange(proportion_male)
# rename(first_name = name) %>%
# left_join(author_df) %>%
# ggplot() +
# aes(x = proportion_male) +
# geom_histogram()
package_details %>%
select(Author, Maintainer) %>%
mutate(first_name = str_extract(Maintainer, "\\w+")) %>%
pull(first_name) %>%
gender::gender() %>%
ggplot() +
aes(proportion_male) +
geom_histogram() +
aes(fill = proportion_male)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
geom_histogram
## function (mapping = NULL, data = NULL, stat = "bin", position = "stack",
## ..., binwidth = NULL, bins = NULL, na.rm = FALSE, orientation = NA,
## show.legend = NA, inherit.aes = TRUE)
## {
## layer(data = data, mapping = mapping, stat = stat, geom = GeomBar,
## position = position, show.legend = show.legend, inherit.aes = inherit.aes,
## params = list2(binwidth = binwidth, bins = bins, na.rm = na.rm,
## orientation = orientation, pad = FALSE, ...))
## }
## <bytecode: 0x7fe5508905f0>
## <environment: namespace:ggplot2>
GeomBar$default_aes
## Aesthetic mapping:
## * `colour` -> NA
## * `fill` -> "grey35"
## * `linewidth` -> 0.5
## * `linetype` -> 1
## * `alpha` -> NA
# what vars are created
ggtrace::layer_after_stat()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## ✔ Executed `ggtrace_inspect_return(last_plot(), ggplot2:::Layer$compute_statistic)`
## # A tibble: 30 × 11
## count x xmin xmax width density ncount ndensity flipped_aes PANEL
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <fct>
## 1 1439 0 -0.0172 0.0172 0.0345 2.78 0.128 0.128 FALSE 1
## 2 88 0.0345 0.0172 0.0517 0.0345 0.170 0.00785 0.00785 FALSE 1
## 3 87 0.0690 0.0517 0.0862 0.0345 0.168 0.00776 0.00776 FALSE 1
## 4 80 0.103 0.0862 0.121 0.0345 0.154 0.00713 0.00713 FALSE 1
## 5 131 0.138 0.121 0.155 0.0345 0.253 0.0117 0.0117 FALSE 1
## 6 38 0.172 0.155 0.190 0.0345 0.0733 0.00339 0.00339 FALSE 1
## 7 18 0.207 0.190 0.224 0.0345 0.0347 0.00161 0.00161 FALSE 1
## 8 44 0.241 0.224 0.259 0.0345 0.0849 0.00392 0.00392 FALSE 1
## 9 129 0.276 0.259 0.293 0.0345 0.249 0.0115 0.0115 FALSE 1
## 10 22 0.310 0.293 0.328 0.0345 0.0424 0.00196 0.00196 FALSE 1
## # ℹ 20 more rows
## # ℹ 1 more variable: group <int>
last_plot() +
aes(fill = after_stat(x)) +
labs(fill = "Proportion") +
scale_fill_gradientn(colors = c("blue", "grey", "orange")) +
ma206distributions:::geom_fulcrum() +
labs(title = "A look at the Shiny-dependant packages' maintainers",
subtitle = "For the 18,474 are package maintainers first names usually given to male or female children...")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.