Here we think about a shortcut to a first looks at networks using a flat, edgelist input that leads straight to a ggplot2 plot (ggraph) space; this will uses tidygraph and ggraph under the hood…
library(tidyverse)
library(tidygraph)
library(ggraph)
set.seed(12345)
edge_list <-
data.frame(node_to = sample(rep(LETTERS[1:10], 50),
replace = T),
node_from = sample(rep(LETTERS[1:10], 50),
replace = T)) %>%
arrange(node_to, node_from) %>%
count(node_to, node_from) %>%
sample_n(18)
head(edge_list)
## node_to node_from n
## 1 C H 3
## 2 G H 6
## 3 I G 3
## 4 D E 8
## 5 F A 4
## 6 G E 6
edge_list %>%
as_tbl_graph() %>%
ggraph() +
geom_edge_link(color = "orange") +
geom_node_point(size = 9,
color = "steelblue",
alpha = .8) +
geom_node_text(aes(label = name))
## Using "stress" as default layout
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# aesthetic mapping for edge characteristics
last_plot() +
geom_edge_link(color = "red",
alpha = .5 ,
aes(edge_width = n))
node_info <- data.frame(my_nodes = LETTERS[1:10], ind_child = sample(c(T,F), 10, replace = T))
edge_list %>%
as_tbl_graph() %>%
left_join(node_info %>%
rename(name = my_nodes)) %>%
ggraph() +
geom_edge_link(color = "orange") +
geom_node_point(size = 9,
color = "steelblue",
alpha = .8) +
geom_node_text(aes(label = name))
## Joining with `by = join_by(name)`
## Using "stress" as default layout
# get into ggplot2 plot space from edge list data frame
ggedgelist <- function(edgelist, nodelist = NULL, ...)(
# message("'name' a variable created in the 'nodes' dataframe")
if(is.null(nodelist)){
edgelist %>%
as_tbl_graph() %>%
ggraph(...)
}else{ # join on nodes attributes if they are available
names(nodelist)[1] <- "name"
edgelist %>%
as_tbl_graph() %>%
full_join(nodelist) %>%
ggraph(...)
}
)
# get a fill viz w edgelist dataframe only
ggedgelist_quick <- function(edgelist, nodelist = NULL, include_names = F, ...){
p <- ggedgelist(edgelist = edgelist,
nodelist = nodelist, ...) +
geom_edge_link(color = "orange") +
geom_node_point(size = 9,
color = "steelblue",
alpha = .8)
if(include_names){p + geom_node_label(aes(label = name))}else{p}
}
geom_node_label_auto <- function(...){
geom_node_label(aes(label = name), ...)
}
geom_node_text_auto <- function(...){
geom_node_text(aes(label = name), ...)
}
ggedgelist_quick()
head(edge_list)
## node_to node_from n
## 1 C H 3
## 2 G H 6
## 3 I G 3
## 4 D E 8
## 5 F A 4
## 6 G E 6
head(node_info)
## my_nodes ind_child
## 1 A FALSE
## 2 B TRUE
## 3 C TRUE
## 4 D FALSE
## 5 E TRUE
## 6 F FALSE
edge_list %>%
ggedgelist_quick()
edge_list %>%
ggedgelist_quick(include_names = T)
edge_list %>%
ggedgelist_quick(nodelist = node_info) +
geom_node_point(aes(color = ind_child), size = 10)
ggedgelist()
+ geom_node_label_auto()
edge_list %>%
ggedgelist(layout = "kk") +
geom_edge_link(arrow = arrow(), linetype = "dashed") +
geom_node_tile(width = .18, height = .5) +
geom_node_label_auto()
# flowcharter example
ggflowchart_example <- tribble(~from, ~to,
"A", "B",
"A", "C",
"A", "D",
"B", "E",
"C", "F",
"F", "G")
ggflowchart_example %>%
as_tbl_graph()
## # A tbl_graph: 7 nodes and 6 edges
## #
## # A rooted tree
## #
## # A tibble: 7 × 1
## name
## <chr>
## 1 A
## 2 B
## 3 C
## 4 F
## 5 D
## 6 E
## # ℹ 1 more row
## #
## # A tibble: 6 × 2
## from to
## <int> <int>
## 1 1 2
## 2 1 3
## 3 1 5
## # ℹ 3 more rows
ggflowchart_example %>%
ggedgelist_quick(layout = "stress",
include_names = T)
ggflowchart_example %>%
ggedgelist_quick(layout = "tree",
include_names = T)
# auto which is default also produces three in this case
ggflowchart_example %>%
ggedgelist_quick(layout = "auto")
layer_data(last_plot(), i = 2)
## x y PANEL group shape colour size fill alpha stroke
## 1 0 3 1 -1 19 steelblue 9 NA 0.8 0.5
## 2 -1 2 1 -1 19 steelblue 9 NA 0.8 0.5
## 3 0 2 1 -1 19 steelblue 9 NA 0.8 0.5
## 4 1 2 1 -1 19 steelblue 9 NA 0.8 0.5
## 5 0 1 1 -1 19 steelblue 9 NA 0.8 0.5
## 6 -1 1 1 -1 19 steelblue 9 NA 0.8 0.5
## 7 0 0 1 -1 19 steelblue 9 NA 0.8 0.5
ggflowchart_example %>%
ggedgelist() +
geom_edge_link(linetype = "dashed") +
geom_node_point(size = 12, alpha = .2) +
geom_node_label_auto(fill = "magenta")
## Using "tree" as default layout
ggflowchart_example %>%
as_tbl_graph() %>%
mutate(dg_cent = centrality_degree()) %>%
ggraph("stress") +
geom_edge_link(linetype = "dashed") +
geom_node_point(alpha = .2, aes(size = dg_cent)) +
scale_size(range = c(8, 15)) +
geom_node_label_auto(fill = "magenta")
allows for node info, which currently isn’t in proposal…
library(ggflowchart)
node_data <- tibble::tibble(
name = c("A", "B", "C", "D", "E", "F", "G"),
type = c("Type 1", "Type 1", "Type 1", "Type 1",
"Type 2", "Type 2", "Type 2")
)
ggflowchart(ggflowchart_example,
node_data, fill = type)
# corrr example…
datasets::airquality %>%
corrr::correlate() %>%
corrr::network_plot(min_cor = .2)
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
node_list <- data.frame(x = names(datasets::airquality))
datasets::airquality %>%
corrr::correlate() %>%
corrr::shave() %>%
pivot_longer(-1) %>%
filter(abs(value) >= .2) ->
corrr_edgelist
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'
# using ggedgelist_quick
corrr_edgelist %>%
ggedgelist_quick(include_names = T,
layout = "fr",
nodelist = node_list) +
geom_edge_arc(aes(edge_width = abs(value)),
alpha = .2)
## Joining with `by = join_by(name)`
# customize using ggedgelist
corrr_edgelist %>%
ggedgelist(layout = "fr") +
geom_edge_arc(aes(edge_width = abs(value),
color = value),
strength = .3) +
geom_node_point() +
geom_node_label_auto() +
scale_edge_colour_gradient2()
library(babynames)
set.seed(12145)
project_partners <- data.frame(x = sample(babynames$name, 19, prob = babynames$n),
y = sample(babynames$name, 19, prob = babynames$n))
project_partners %>%
ggedgelist_quick(layout = "fr",
include_names = T)
cran_20230905 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_20230905.csv')
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 19838 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (51): Package, Version, Priority, Depends, Imports, LinkingTo, Suggests...
## lgl (15): License_is_FOSS, License_restricts_use, BuildKeepEmpty, BuildManu...
## date (1): Published
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
package_authors <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/package_authors.csv')
## Rows: 51281 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Package, authorsR
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cran_graph_nodes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_graph_nodes.csv')
## Rows: 15419 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name
## dbl (4): x, y, dist2HW, cc
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cran_graph_edges <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-09-19/cran_graph_edges.csv')
## Rows: 126988 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): from, to, weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cran_20230905 %>%
filter(Package %>%
str_detect("^gg")) %>%
filter(Depends %>%
str_detect("ggplot2")) %>%
select(Package) %>%
left_join(package_authors %>% mutate(authorsR = authorsR %>% str_replace(" ", "\n"))) %>%
ggedgelist_quick(layout = "fr",
include_names = T)
## Joining with `by = join_by(Package)`
## Warning in left_join(., package_authors %>% mutate(authorsR = authorsR %>% : Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 2 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
## warning.
package_authors %>%
count(authorsR) %>%
filter(n >= 10) ->
crans_prolific
package_authors %>%
inner_join(crans_prolific) ->
package_authors_prolific
## Joining with `by = join_by(authorsR)`
package_authors_prolific %>%
group_by(Package) %>%
filter(n()>=10) %>%
ggedgelist_quick(include_names = T, layout = "fr") +
labs(title = "Which CRAN packages have at least 10 very prolific CRAN developers as authors",
caption = "Where prolific as 10 or more >=10 R packages on CRAN")