Intro {ggedgelist} idea

Here we think about a shortcut to a first looks at networks using a flat, edgelist input that leads straight to a ggplot2 plot (ggraph) space; this will uses tidygraph and ggraph under the hood…

Step 00 Before getting into it, create an ‘interesting’ edge list

library(tidyverse)
library(tidygraph)
library(ggraph)

set.seed(12345)

edge_list <- 
  data.frame(node_to = sample(rep(LETTERS[1:10], 50), 
                              replace = T), 
             node_from = sample(rep(LETTERS[1:10], 50),
                                replace = T)) %>% 
  arrange(node_to, node_from) %>% 
  count(node_to, node_from) %>% 
  sample_n(18)

head(edge_list)
##   node_to node_from n
## 1       C         H 3
## 2       G         H 6
## 3       I         G 3
## 4       D         E 8
## 5       F         A 4
## 6       G         E 6

Step 0. Whats the status quo edgelist -> network viz

edge_list %>% 
  as_tbl_graph() %>% 
  ggraph() +
  geom_edge_link(color = "orange") +
  geom_node_point(size = 9,
                  color = "steelblue",
                  alpha = .8) + 
  geom_node_text(aes(label = name))
## Using "stress" as default layout
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# aesthetic mapping for edge characteristics
last_plot() + 
  geom_edge_link(color = "red", 
                 alpha = .5 ,
                 aes(edge_width = n))

Step 0.a And to also vizualize node atributes

node_info <- data.frame(my_nodes = LETTERS[1:10], ind_child = sample(c(T,F), 10, replace = T))

edge_list %>% 
  as_tbl_graph() %>%
  left_join(node_info %>% 
              rename(name = my_nodes)) %>%
  ggraph() +
  geom_edge_link(color = "orange") +
  geom_node_point(size = 9,
                  color = "steelblue",
                  alpha = .8) + 
  geom_node_text(aes(label = name))
## Joining with `by = join_by(name)`
## Using "stress" as default layout

Proposed functions, edgelist -> plotspace feel

# get into ggplot2 plot space from edge list data frame 
ggedgelist <- function(edgelist, nodelist = NULL, ...)(
  
  # message("'name' a variable created in the 'nodes' dataframe")
  
    if(is.null(nodelist)){
    edgelist %>% 
    as_tbl_graph() %>% 
    ggraph(...) 
    
  }else{ # join on nodes attributes if they are available
    
    names(nodelist)[1] <- "name"
    
    edgelist %>% 
    as_tbl_graph() %>%
    full_join(nodelist) %>% 
    ggraph(...) 
    
  }
  
)

# get a fill viz w edgelist dataframe only
ggedgelist_quick <- function(edgelist, nodelist = NULL, include_names = F,  ...){
  

  p <- ggedgelist(edgelist = edgelist,
                  nodelist = nodelist, ...) +
  geom_edge_link(color = "orange") +
  geom_node_point(size = 9,
                  color = "steelblue",
                  alpha = .8) 
  
  if(include_names){p + geom_node_label(aes(label = name))}else{p}
  
}



geom_node_label_auto <- function(...){ 
  
  geom_node_label(aes(label = name), ...)
  
}

geom_node_text_auto <- function(...){ 
  
  geom_node_text(aes(label = name), ...)
  
}

examples w/ proposed functions

ggedgelist_quick()

head(edge_list)
##   node_to node_from n
## 1       C         H 3
## 2       G         H 6
## 3       I         G 3
## 4       D         E 8
## 5       F         A 4
## 6       G         E 6
head(node_info)
##   my_nodes ind_child
## 1        A     FALSE
## 2        B      TRUE
## 3        C      TRUE
## 4        D     FALSE
## 5        E      TRUE
## 6        F     FALSE
edge_list %>% 
  ggedgelist_quick() 

edge_list %>% 
  ggedgelist_quick(include_names = T) 

edge_list %>% 
  ggedgelist_quick(nodelist = node_info) + 
  geom_node_point(aes(color = ind_child), size = 10)

ggedgelist() + geom_node_label_auto()

edge_list %>% 
  ggedgelist(layout = "kk") + 
  geom_edge_link(arrow = arrow(), linetype = "dashed") +
  geom_node_tile(width = .18, height = .5) + 
  geom_node_label_auto() 

using the ggflowchart example (100% inspiration)

# flowcharter example
ggflowchart_example <- tribble(~from, ~to,
        "A", "B",
        "A", "C",
        "A", "D",
        "B", "E",
        "C", "F",
        "F", "G") 

ggflowchart_example %>% 
  as_tbl_graph()
## # A tbl_graph: 7 nodes and 6 edges
## #
## # A rooted tree
## #
## # A tibble: 7 × 1
##   name 
##   <chr>
## 1 A    
## 2 B    
## 3 C    
## 4 F    
## 5 D    
## 6 E    
## # ℹ 1 more row
## #
## # A tibble: 6 × 2
##    from    to
##   <int> <int>
## 1     1     2
## 2     1     3
## 3     1     5
## # ℹ 3 more rows

Start by using quick plot function ‘qedgelist2ggraph’

ggflowchart_example %>% 
  ggedgelist_quick(layout = "stress", 
                   include_names = T)

ggflowchart_example %>% 
  ggedgelist_quick(layout = "tree", 
                   include_names = T)

# auto which is default also produces three in this case
ggflowchart_example %>% 
  ggedgelist_quick(layout = "auto") 

layer_data(last_plot(), i = 2)
##    x y PANEL group shape    colour size fill alpha stroke
## 1  0 3     1    -1    19 steelblue    9   NA   0.8    0.5
## 2 -1 2     1    -1    19 steelblue    9   NA   0.8    0.5
## 3  0 2     1    -1    19 steelblue    9   NA   0.8    0.5
## 4  1 2     1    -1    19 steelblue    9   NA   0.8    0.5
## 5  0 1     1    -1    19 steelblue    9   NA   0.8    0.5
## 6 -1 1     1    -1    19 steelblue    9   NA   0.8    0.5
## 7  0 0     1    -1    19 steelblue    9   NA   0.8    0.5

Use ggedgelist and geom_edge_* and geom_node_* functions to customize

ggflowchart_example %>% 
  ggedgelist() +
  geom_edge_link(linetype = "dashed") + 
  geom_node_point(size = 12, alpha = .2) + 
  geom_node_label_auto(fill = "magenta")
## Using "tree" as default layout

Capabilities end here. If you need to access powerful network calculation capabilities, step back into the tidygraph world!

ggflowchart_example %>% 
  as_tbl_graph() %>% 
  mutate(dg_cent = centrality_degree()) %>% 
  ggraph("stress") +
  geom_edge_link(linetype = "dashed") + 
  geom_node_point(alpha = .2, aes(size = dg_cent)) + 
  scale_size(range = c(8, 15)) +
  geom_node_label_auto(fill = "magenta")

use ggflowchart to make it even faster and prettier

allows for node info, which currently isn’t in proposal…

library(ggflowchart)
node_data <- tibble::tibble(
  name = c("A", "B", "C", "D", "E", "F", "G"),
  type = c("Type 1", "Type 1", "Type 1", "Type 1", 
           "Type 2", "Type 2", "Type 2")
  )
ggflowchart(ggflowchart_example, 
            node_data, fill = type)

# corrr example…

datasets::airquality %>% 
  corrr::correlate() %>% 
  corrr::network_plot(min_cor = .2)
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'

node_list <-  data.frame(x = names(datasets::airquality))

datasets::airquality %>% 
  corrr::correlate() %>% 
  corrr::shave() %>% 
  pivot_longer(-1) %>% 
  filter(abs(value) >= .2) ->
corrr_edgelist
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
# using ggedgelist_quick
corrr_edgelist %>% 
  ggedgelist_quick(include_names = T, 
                   layout = "fr", 
                   nodelist = node_list) + 
  geom_edge_arc(aes(edge_width = abs(value)), 
                alpha = .2)
## Joining with `by = join_by(name)`

# customize using ggedgelist
corrr_edgelist %>% 
  ggedgelist(layout = "fr") + 
  geom_edge_arc(aes(edge_width = abs(value),
                    color = value), 
                strength = .3) + 
  geom_node_point() +
  geom_node_label_auto() + 
  scale_edge_colour_gradient2()

library(babynames)
set.seed(12145)
project_partners <- data.frame(x = sample(babynames$name, 19, prob = babynames$n), 
                               y = sample(babynames$name, 19, prob = babynames$n))

project_partners %>% 
  ggedgelist_quick(layout = "fr", 
                   include_names = T)

cran_20230905 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_20230905.csv')
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 19838 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (51): Package, Version, Priority, Depends, Imports, LinkingTo, Suggests...
## lgl  (15): License_is_FOSS, License_restricts_use, BuildKeepEmpty, BuildManu...
## date  (1): Published
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
package_authors <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/package_authors.csv')
## Rows: 51281 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Package, authorsR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cran_graph_nodes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_graph_nodes.csv')
## Rows: 15419 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name
## dbl (4): x, y, dist2HW, cc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cran_graph_edges <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_graph_edges.csv')
## Rows: 126988 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): from, to, weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cran_20230905 %>% 
  filter(Package %>% 
           str_detect("^gg")) %>% 
  filter(Depends %>% 
           str_detect("ggplot2")) %>% 
  select(Package) %>% 
  left_join(package_authors %>% mutate(authorsR = authorsR %>% str_replace(" ", "\n"))) %>% 
  ggedgelist_quick(layout = "fr", 
                   include_names = T)
## Joining with `by = join_by(Package)`
## Warning in left_join(., package_authors %>% mutate(authorsR = authorsR %>% : Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 2 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
##   warning.

package_authors %>% 
  count(authorsR) %>% 
  filter(n >= 10) ->
crans_prolific

package_authors %>% 
  inner_join(crans_prolific) ->
package_authors_prolific
## Joining with `by = join_by(authorsR)`
package_authors_prolific %>% 
  group_by(Package) %>% 
  filter(n()>=10) %>% 
  ggedgelist_quick(include_names = T, layout = "fr") + 
  labs(title = "Which CRAN packages have at least 10 very prolific CRAN developers as authors",
       caption = "Where prolific as 10 or more >=10 R packages on CRAN")