Intro {ggedgelist} idea
Step 00 Before getting into it, create an ‘interesting’ edge list
Step 0. Whats the status quo edgelist -> network viz
Step 0.a And to also vizualize node atributes
Proposed functions, edgelist -> plotspace feel
examples w/ proposed functions
- ggedgelist_quick()
- ggedgelist() + geom_node_label_auto()
using the ggflowchart example (100% inspiration)

Intro {ggedgelist} idea

Here we think about a shortcut to a first looks at networks using a flat, edgelist input that leads straight to a ggplot2 plot (ggraph) space; this will uses tidygraph and ggraph under the hood…

Step 00 Before getting into it, create an ‘interesting’ edge list

library(tidyverse)
library(tidygraph)
library(ggraph)

set.seed(12345)

edge_list <- 
  data.frame(node_to = sample(rep(LETTERS[1:10], 50), 
                              replace = T), 
             node_from = sample(rep(LETTERS[1:10], 50),
                                replace = T)) %>% 
  arrange(node_to, node_from) %>% 
  count(node_to, node_from) %>% 
  sample_n(18)

head(edge_list)

##   node_to node_from n
## 1       C         H 3
## 2       G         H 6
## 3       I         G 3
## 4       D         E 8
## 5       F         A 4
## 6       G         E 6

Step 0. Whats the status quo edgelist -> network viz

edge_list %>% 
  as_tbl_graph() %>% 
  ggraph() +
  geom_edge_link(color = "orange") +
  geom_node_point(size = 9,
                  color = "steelblue",
                  alpha = .8) + 
  geom_node_text(aes(label = name))

## Using "stress" as default layout

## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# aesthetic mapping for edge characteristics
last_plot() + 
  geom_edge_link(color = "red", 
                 alpha = .5 ,
                 aes(edge_width = n))

Step 0.a And to also vizualize node atributes

node_info <- data.frame(my_nodes = LETTERS[1:10], ind_child = sample(c(T,F), 10, replace = T))

edge_list %>% 
  as_tbl_graph() %>%
  left_join(node_info %>% 
              rename(name = my_nodes)) %>%
  ggraph() +
  geom_edge_link(color = "orange") +
  geom_node_point(size = 9,
                  color = "steelblue",
                  alpha = .8) + 
  geom_node_text(aes(label = name))

## Joining with `by = join_by(name)`
## Using "stress" as default layout

Proposed functions, edgelist -> plotspace feel

# get into ggplot2 plot space from edge list data frame 
ggedgelist <- function(edgelist, nodelist = NULL, ...)(
  
  # message("'name' a variable created in the 'nodes' dataframe")
  
    if(is.null(nodelist)){
    edgelist %>% 
    as_tbl_graph() %>% 
    ggraph(...) 
    
  }else{ # join on nodes attributes if they are available
    
    names(nodelist)[1] <- "name"
    
    edgelist %>% 
    as_tbl_graph() %>%
    full_join(nodelist) %>% 
    ggraph(...) 
    
  }
  
)

# get a fill viz w edgelist dataframe only
ggedgelist_quick <- function(edgelist, nodelist = NULL, include_names = F,  ...){
  

  p <- ggedgelist(edgelist = edgelist,
                  nodelist = nodelist, ...) +
  geom_edge_link(color = "orange") +
  geom_node_point(size = 9,
                  color = "steelblue",
                  alpha = .8) 
  
  if(include_names){p + geom_node_label(aes(label = name))}else{p}
  
}



geom_node_label_auto <- function(...){ 
  
  geom_node_label(aes(label = name), ...)
  
}

geom_node_text_auto <- function(...){ 
  
  geom_node_text(aes(label = name), ...)
  
}

examples w/ proposed functions

`ggedgelist_quick()`

head(edge_list)

##   node_to node_from n
## 1       C         H 3
## 2       G         H 6
## 3       I         G 3
## 4       D         E 8
## 5       F         A 4
## 6       G         E 6

head(node_info)

##   my_nodes ind_child
## 1        A     FALSE
## 2        B      TRUE
## 3        C      TRUE
## 4        D     FALSE
## 5        E      TRUE
## 6        F     FALSE

edge_list %>% 
  ggedgelist_quick() 

edge_list %>% 
  ggedgelist_quick(include_names = T) 

edge_list %>% 
  ggedgelist_quick(nodelist = node_info) + 
  geom_node_point(aes(color = ind_child), size = 10)

`ggedgelist()` + `geom_node_label_auto()`

edge_list %>% 
  ggedgelist(layout = "kk") + 
  geom_edge_link(arrow = arrow(), linetype = "dashed") +
  geom_node_tile(width = .18, height = .5) + 
  geom_node_label_auto()

using the ggflowchart example (100% inspiration)

# flowcharter example
ggflowchart_example <- tribble(~from, ~to,
        "A", "B",
        "A", "C",
        "A", "D",
        "B", "E",
        "C", "F",
        "F", "G") 

ggflowchart_example %>% 
  as_tbl_graph()

## # A tbl_graph: 7 nodes and 6 edges
## #
## # A rooted tree
## #
## # A tibble: 7 × 1
##   name 
##   <chr>
## 1 A    
## 2 B    
## 3 C    
## 4 F    
## 5 D    
## 6 E    
## # ℹ 1 more row
## #
## # A tibble: 6 × 2
##    from    to
##   <int> <int>
## 1     1     2
## 2     1     3
## 3     1     5
## # ℹ 3 more rows

Start by using quick plot function ‘qedgelist2ggraph’

ggflowchart_example %>% 
  ggedgelist_quick(layout = "stress", 
                   include_names = T)

ggflowchart_example %>% 
  ggedgelist_quick(layout = "tree", 
                   include_names = T)

# auto which is default also produces three in this case
ggflowchart_example %>% 
  ggedgelist_quick(layout = "auto") 

layer_data(last_plot(), i = 2)

##    x y PANEL group shape    colour size fill alpha stroke
## 1  0 3     1    -1    19 steelblue    9   NA   0.8    0.5
## 2 -1 2     1    -1    19 steelblue    9   NA   0.8    0.5
## 3  0 2     1    -1    19 steelblue    9   NA   0.8    0.5
## 4  1 2     1    -1    19 steelblue    9   NA   0.8    0.5
## 5  0 1     1    -1    19 steelblue    9   NA   0.8    0.5
## 6 -1 1     1    -1    19 steelblue    9   NA   0.8    0.5
## 7  0 0     1    -1    19 steelblue    9   NA   0.8    0.5

Use ggedgelist and geom_edge_* and geom_node_* functions to customize

ggflowchart_example %>% 
  ggedgelist() +
  geom_edge_link(linetype = "dashed") + 
  geom_node_point(size = 12, alpha = .2) + 
  geom_node_label_auto(fill = "magenta")

## Using "tree" as default layout

Capabilities end here. If you need to access powerful network calculation capabilities, step back into the tidygraph world!

ggflowchart_example %>% 
  as_tbl_graph() %>% 
  mutate(dg_cent = centrality_degree()) %>% 
  ggraph("stress") +
  geom_edge_link(linetype = "dashed") + 
  geom_node_point(alpha = .2, aes(size = dg_cent)) + 
  scale_size(range = c(8, 15)) +
  geom_node_label_auto(fill = "magenta")

use ggflowchart to make it even faster and prettier

allows for node info, which currently isn’t in proposal…

library(ggflowchart)
node_data <- tibble::tibble(
  name = c("A", "B", "C", "D", "E", "F", "G"),
  type = c("Type 1", "Type 1", "Type 1", "Type 1", 
           "Type 2", "Type 2", "Type 2")
  )
ggflowchart(ggflowchart_example, 
            node_data, fill = type)

# corrr example…

datasets::airquality %>% 
  corrr::correlate() %>% 
  corrr::network_plot(min_cor = .2)

## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'

node_list <-  data.frame(x = names(datasets::airquality))

datasets::airquality %>% 
  corrr::correlate() %>% 
  corrr::shave() %>% 
  pivot_longer(-1) %>% 
  filter(abs(value) >= .2) ->
corrr_edgelist

## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'

# using ggedgelist_quick
corrr_edgelist %>% 
  ggedgelist_quick(include_names = T, 
                   layout = "fr", 
                   nodelist = node_list) + 
  geom_edge_arc(aes(edge_width = abs(value)), 
                alpha = .2)

## Joining with `by = join_by(name)`

# customize using ggedgelist
corrr_edgelist %>% 
  ggedgelist(layout = "fr") + 
  geom_edge_arc(aes(edge_width = abs(value),
                    color = value), 
                strength = .3) + 
  geom_node_point() +
  geom_node_label_auto() + 
  scale_edge_colour_gradient2()

library(babynames)
set.seed(12145)
project_partners <- data.frame(x = sample(babynames$name, 19, prob = babynames$n), 
                               y = sample(babynames$name, 19, prob = babynames$n))

project_partners %>% 
  ggedgelist_quick(layout = "fr", 
                   include_names = T)

cran_20230905 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_20230905.csv')

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 19838 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (51): Package, Version, Priority, Depends, Imports, LinkingTo, Suggests...
## lgl  (15): License_is_FOSS, License_restricts_use, BuildKeepEmpty, BuildManu...
## date  (1): Published
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

package_authors <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/package_authors.csv')

## Rows: 51281 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Package, authorsR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cran_graph_nodes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_graph_nodes.csv')

## Rows: 15419 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name
## dbl (4): x, y, dist2HW, cc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cran_graph_edges <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_graph_edges.csv')

## Rows: 126988 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): from, to, weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cran_20230905 %>% 
  filter(Package %>% 
           str_detect("^gg")) %>% 
  filter(Depends %>% 
           str_detect("ggplot2")) %>% 
  select(Package) %>% 
  left_join(package_authors %>% mutate(authorsR = authorsR %>% str_replace(" ", "\n"))) %>% 
  ggedgelist_quick(layout = "fr", 
                   include_names = T)

## Joining with `by = join_by(Package)`

## Warning in left_join(., package_authors %>% mutate(authorsR = authorsR %>% : Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 2 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
##   warning.

package_authors %>% 
  count(authorsR) %>% 
  filter(n >= 10) ->
crans_prolific

package_authors %>% 
  inner_join(crans_prolific) ->
package_authors_prolific

## Joining with `by = join_by(authorsR)`

package_authors_prolific %>% 
  group_by(Package) %>% 
  filter(n()>=10) %>% 
  ggedgelist_quick(include_names = T, layout = "fr") + 
  labs(title = "Which CRAN packages have at least 10 very prolific CRAN developers as authors",
       caption = "Where prolific as 10 or more >=10 R packages on CRAN")

ggedgelist proposal

Evangeline Reynolds

9/20/2023

Intro {ggedgelist} idea

Step 00 Before getting into it, create an ‘interesting’ edge list

Step 0. Whats the status quo edgelist -> network viz

Step 0.a And to also vizualize node atributes

Proposed functions, edgelist -> plotspace feel

examples w/ proposed functions

`ggedgelist_quick()`

`ggedgelist()` + `geom_node_label_auto()`

using the ggflowchart example (100% inspiration)

Start by using quick plot function ‘qedgelist2ggraph’

Use ggedgelist and geom_edge_* and geom_node_* functions to customize

Capabilities end here. If you need to access powerful network calculation capabilities, step back into the tidygraph world!

use ggflowchart to make it even faster and prettier

ggedgelist proposal

Evangeline Reynolds

9/20/2023

Intro {ggedgelist} idea

Step 00 Before getting into it, create an ‘interesting’ edge list

Step 0. Whats the status quo edgelist -> network viz

Step 0.a And to also vizualize node atributes

Proposed functions, edgelist -> plotspace feel

examples w/ proposed functions

ggedgelist_quick()

ggedgelist() + geom_node_label_auto()

using the ggflowchart example (100% inspiration)

Start by using quick plot function ‘qedgelist2ggraph’

Use ggedgelist and geom_edge_* and geom_node_* functions to customize

Capabilities end here. If you need to access powerful network calculation capabilities, step back into the tidygraph world!

use ggflowchart to make it even faster and prettier

`ggedgelist_quick()`

`ggedgelist()` + `geom_node_label_auto()`