Example simple feature to data frame…
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(sp)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## using a MULTIPOLYGON data set supplied with library(sf)
nc <- sf::st_read(system.file("shape/nc.shp", package="sf"))
## Reading layer `nc' from data source
## `/Library/Frameworks/R.framework/Versions/4.2/Resources/library/sf/shape/nc.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 100 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
## Geodetic CRS: NAD27
nc %>%
ggplot() +
geom_sf(aes(fill = AREA))
# pull out all the data frames that define polygons
nc_polygons <- nc$geometry %>% purrr::flatten() %>% purrr::flatten()
# There's sometimes multiple polygons associated with a row
#
nsubgroup <- nc$geometry %>% purrr::map_dbl(.f = length)
tibble(nsubgroup) %>%
mutate(group = 1:n()) %>%
uncount(weights = nsubgroup) %>%
group_by(group) %>%
mutate(subgroup = 1:n()) %>%
mutate(region = paste0(group,'.', subgroup)) %>%
ungroup() %>%
mutate(polygon = 1:n()) ->
groups_and_polygons_nc
nc_df <- nc
nc_df$geometry <- NULL
str(nc_df)
## 'data.frame': 100 obs. of 14 variables:
## $ AREA : num 0.114 0.061 0.143 0.07 0.153 0.097 0.062 0.091 0.118 0.124 ...
## $ PERIMETER: num 1.44 1.23 1.63 2.97 2.21 ...
## $ CNTY_ : num 1825 1827 1828 1831 1832 ...
## $ CNTY_ID : num 1825 1827 1828 1831 1832 ...
## $ NAME : chr "Ashe" "Alleghany" "Surry" "Currituck" ...
## $ FIPS : chr "37009" "37005" "37171" "37053" ...
## $ FIPSNO : num 37009 37005 37171 37053 37131 ...
## $ CRESS_ID : int 5 3 86 27 66 46 15 37 93 85 ...
## $ BIR74 : num 1091 487 3188 508 1421 ...
## $ SID74 : num 1 0 5 1 9 7 0 0 4 1 ...
## $ NWBIR74 : num 10 10 208 123 1066 ...
## $ BIR79 : num 1364 542 3616 830 1606 ...
## $ SID79 : num 0 3 6 2 3 5 2 2 2 5 ...
## $ NWBIR79 : num 19 12 260 145 1197 ...
nc_df$group <- 1:nrow(nc_df)
polygons_out <- data.frame()
for (i in 1:length(nc_polygons)){
temp <- nc_polygons[[i]] %>%
data.frame() %>%
mutate(polygon = i)
polygons_out = bind_rows(polygons_out, temp)
}
polygons_out %>%
left_join(groups_and_polygons_nc) %>%
left_join(nc_df) %>%
ggplot() +
aes(x = X1, y = X2 , group = polygon, fill = AREA) +
geom_polygon() +
coord_map(projection = "orthographic",
orientation = c(31, -74, -3))
## Joining with `by = join_by(polygon)`
## Joining with `by = join_by(group)`
Write functions for ggnorthcarolina
# imagine you only have nc_df
nc_df %>% head()
## AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74
## 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1
## 2 0.061 1.231 1827 1827 Alleghany 37005 37005 3 487 0
## 3 0.143 1.630 1828 1828 Surry 37171 37171 86 3188 5
## 4 0.070 2.968 1831 1831 Currituck 37053 37053 27 508 1
## 5 0.153 2.206 1832 1832 Northampton 37131 37131 66 1421 9
## 6 0.097 1.670 1833 1833 Hertford 37091 37091 46 1452 7
## NWBIR74 BIR79 SID79 NWBIR79 group
## 1 10 1364 0 19 1
## 2 10 542 3 12 2
## 3 208 3616 6 260 3
## 4 123 830 2 145 4
## 5 1066 1606 3 1197 5
## 6 954 1838 5 1237 6
geometries_frame_to_id_group <- function(geometries_frame){
nsubgroup <- geometries_frame$geometry %>% purrr::map_dbl(.f = length)
tibble(nsubgroup) %>%
mutate(mgroup = 1:n()) %>% # row number
uncount(weights = nsubgroup) %>%
group_by(mgroup) %>%
mutate(subgroup = 1:n()) %>%
mutate(region = paste0(mgroup,'.', subgroup)) %>%
ungroup() %>%
mutate(polygon = 1:n()) %>%
ungroup()
}
geometries_frame_to_id_group(nc)
## # A tibble: 108 × 4
## mgroup subgroup region polygon
## <int> <int> <chr> <int>
## 1 1 1 1.1 1
## 2 2 1 2.1 2
## 3 3 1 3.1 3
## 4 4 1 4.1 4
## 5 4 2 4.2 5
## 6 4 3 4.3 6
## 7 5 1 5.1 7
## 8 6 1 6.1 8
## 9 7 1 7.1 9
## 10 8 1 8.1 10
## # … with 98 more rows
geometries_frame_to_df <- function(geometries_frame){
polygons_flat <- data.frame()
polygons <- geometries_frame$geometry %>% purrr::flatten() %>% purrr::flatten()
for (i in 1:length(nc_polygons)){
temp <- nc_polygons[[i]] %>%
data.frame() %>%
mutate(polygon = i)
polygons_flat = bind_rows(polygons_flat, temp)
}
polygons_flat %>%
rename(x = X1,
y = X2)
}
geometries_frame_to_df(nc) %>%
head()
## x y polygon
## 1 -81.47276 36.23436 1
## 2 -81.54084 36.27251 1
## 3 -81.56198 36.27359 1
## 4 -81.63306 36.34069 1
## 5 -81.74107 36.39178 1
## 6 -81.69828 36.47178 1
geometries_frame_to_ggplot_reference <- function(geometries_frame = nc){
keep_frame <- geometries_frame
keep_frame$geometry = NULL
keep_frame %>%
data.frame() %>%
janitor::clean_names() %>%
mutate(mgroup = row_number()) %>%
left_join(geometries_frame_to_id_group(nc)) %>%
left_join(geometries_frame_to_df(nc))
}
geometries_frame_to_id_group(nc) %>%
count(mgroup)
## # A tibble: 100 × 2
## mgroup n
## <int> <int>
## 1 1 1
## 2 2 1
## 3 3 1
## 4 4 3
## 5 5 1
## 6 6 1
## 7 7 1
## 8 8 1
## 9 9 1
## 10 10 1
## # … with 90 more rows
nc_ggplot2_reference <- geometries_frame_to_ggplot_reference(nc)
## Joining with `by = join_by(mgroup)`
## Warning in left_join(., geometries_frame_to_id_group(nc)): Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 4 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
## warning.
## Joining with `by = join_by(polygon)`
## Warning in left_join(., geometries_frame_to_df(nc)): Each row in `x` is expected to match at most 1 row in `y`.
## ℹ Row 1 of `x` matches multiple rows.
## ℹ If multiple matches are expected, set `multiple = "all"` to silence this
## warning.
compute_county_nc <- function(data, scales){
data %>%
inner_join(nc_ggplot2_reference, multiple = "all")
}
nc_df %>%
rename(fips = FIPS) %>%
compute_county_nc() %>%
head()
## Joining with `by = join_by(fips)`
## AREA PERIMETER CNTY_ CNTY_ID NAME fips FIPSNO CRESS_ID BIR74 SID74 NWBIR74
## 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 2 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 3 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 4 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 5 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 6 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## BIR79 SID79 NWBIR79 group area perimeter cnty cnty_id name fipsno cress_id
## 1 1364 0 19 1 0.114 1.442 1825 1825 Ashe 37009 5
## 2 1364 0 19 1 0.114 1.442 1825 1825 Ashe 37009 5
## 3 1364 0 19 1 0.114 1.442 1825 1825 Ashe 37009 5
## 4 1364 0 19 1 0.114 1.442 1825 1825 Ashe 37009 5
## 5 1364 0 19 1 0.114 1.442 1825 1825 Ashe 37009 5
## 6 1364 0 19 1 0.114 1.442 1825 1825 Ashe 37009 5
## bir74 sid74 nwbir74 bir79 sid79 nwbir79 mgroup subgroup region polygon
## 1 1091 1 10 1364 0 19 1 1 1.1 1
## 2 1091 1 10 1364 0 19 1 1 1.1 1
## 3 1091 1 10 1364 0 19 1 1 1.1 1
## 4 1091 1 10 1364 0 19 1 1 1.1 1
## 5 1091 1 10 1364 0 19 1 1 1.1 1
## 6 1091 1 10 1364 0 19 1 1 1.1 1
## x y
## 1 -81.47276 36.23436
## 2 -81.54084 36.27251
## 3 -81.56198 36.27359
## 4 -81.63306 36.34069
## 5 -81.74107 36.39178
## 6 -81.69828 36.47178
nc_df %>%
rename(name = NAME) %>%
compute_county_nc() %>%
head()
## Joining with `by = join_by(name)`
## AREA PERIMETER CNTY_ CNTY_ID name FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74
## 1 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 2 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 3 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 4 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 5 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## 6 0.114 1.442 1825 1825 Ashe 37009 37009 5 1091 1 10
## BIR79 SID79 NWBIR79 group area perimeter cnty cnty_id fips fipsno cress_id
## 1 1364 0 19 1 0.114 1.442 1825 1825 37009 37009 5
## 2 1364 0 19 1 0.114 1.442 1825 1825 37009 37009 5
## 3 1364 0 19 1 0.114 1.442 1825 1825 37009 37009 5
## 4 1364 0 19 1 0.114 1.442 1825 1825 37009 37009 5
## 5 1364 0 19 1 0.114 1.442 1825 1825 37009 37009 5
## 6 1364 0 19 1 0.114 1.442 1825 1825 37009 37009 5
## bir74 sid74 nwbir74 bir79 sid79 nwbir79 mgroup subgroup region polygon
## 1 1091 1 10 1364 0 19 1 1 1.1 1
## 2 1091 1 10 1364 0 19 1 1 1.1 1
## 3 1091 1 10 1364 0 19 1 1 1.1 1
## 4 1091 1 10 1364 0 19 1 1 1.1 1
## 5 1091 1 10 1364 0 19 1 1 1.1 1
## 6 1091 1 10 1364 0 19 1 1 1.1 1
## x y
## 1 -81.47276 36.23436
## 2 -81.54084 36.27251
## 3 -81.56198 36.27359
## 4 -81.63306 36.34069
## 5 -81.74107 36.39178
## 6 -81.69828 36.47178
Step 2: pass to ggproto
StatPolygonnccounty <- ggplot2::ggproto(`_class` = "StatPolygonnccounty",
`_inherit` = ggplot2::Stat,
# required_aes = c("fips"),
# setup_data = my_setup_data,
compute_panel = compute_county_nc,
default_aes = aes(group = after_stat(polygon))
)
Step 3: write geom_* function
geom_polygon_nccounty <- function(
mapping = NULL,
data = NULL,
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatPolygonnccounty, # proto object from step 2
geom = ggplot2::GeomPolygon, # inherit other behavior
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
step 4: test geom_* function, celebrate!
nc_df %>% #count(region)
mutate(FIPS = FIPS %>% as.factor()) %>%
mutate(NAME = NAME %>% as.factor()) %>%
ggplot(data = .) +
aes(name = NAME, # state indicates position instead of x and y
fill = AREA) +
geom_polygon_nccounty() +
coord_map(projection = "orthographic",
orientation = c(41, -74, 0)) +
geom_polygon_nccounty(data = . %>% filter(NAME == "Ashe"),
color = "red")
## Joining with `by = join_by(name)`
## Joining with `by = join_by(name)`