Experiment
# proportions, observed v. asserted lesson...
# proposed API
library(tidyverse)
isi_donor_url <- "https://www.isi-stats.com/isi/data/prelim/OrganDonor.txt"
donor <- read_delim(isi_donor_url) %>%
mutate(Choice = fct_rev(Choice))
donor %>% head()
donor |>
ggplot() +
aes(x = Choice) +
geom_stack() + # custom bar layer
geom_stack_label() + # label stack, also see geom_stack_label_percent()
geom_x_support() + # line under range of x
geom_x_prop() + # balancing point
geom_x_prop_label() + # balancing point (proportion) label
stamp_x_prop(prop = .5) +
stamp_x_prop_label(prop = .5) +
geom_prop_null_distribution(prop = .5)
# Proposed layer contents
library(statexpress)
library(tidyverse)
isi_donor_url <- "https://www.isi-stats.com/isi/data/prelim/OrganDonor.txt"
donor <- read_delim(isi_donor_url) %>%
mutate(Choice = fct_rev(Choice))
## Rows: 161 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Default, Choice
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 1. layer stack of bricks
compute_group_bricks <- function(data, scales, width = .2){
data %>%
mutate(row = row_number()) %>%
mutate(y = row - .5) %>%
mutate(width = width)
}
# 2. layer label stack with count
compute_group_count <- function(data, scales){
data %>%
count(x) %>%
mutate(y = n,
label = n)
}
# 3. layer add x span
compute_scale <- function(data, scales){
data %>%
summarise(min_x = min(x),
xend = max(x),
y = 0,
yend = 0) %>%
rename(x = min_x)
}
# 4. layer add balancing point
compute_xmean_at_y0 <- function(data, scales){
data %>%
summarise(x = mean(x),
y = 0,
label = "^")
}
# 5. layer add balancing point value label
compute_xmean_at_y0_label <- function(data, scales){
data %>%
summarise(x = mean(x),
y = 0,
label = after_stat(round(x - 1, 2)))
}
# 6.
compute_panel_prop_asserted <- function(data, scales, null = .5){
# stamp type layer - so ignor input data
data.frame(y = 0,
x = null + 1,
label = "^"
)
}
compute_panel_prop_asserted_label <- function(data, scales, null = .5){
# stamp type layer - so ignor input data
data.frame(y = 0,
x = null + 1,
label = round(null, 2)
)
}
# Proposed layer composition
compute_dnorm_prop <- function(data, scales, null = .5, dist_sds = seq(-3.5, 3.5, by = .1)
){
n <- nrow(data)
sd = sqrt(null * (1 - null)/n) # sd of the null distribution
q <- dist_sds * sd + null
data.frame(x = q + 1) %>%
mutate(height = dnorm(q, sd = sd, mean = null)) %>%
mutate(height_max = dnorm(0, sd = sd, mean = 0)) %>%
mutate(y = .35*n*height/height_max) %>% # This is a bit fragile...
mutate(xend = x,
yend = 0) %>%
# @teunbrand GeomArea$setup_data() requires a group column. Your panel computation does not preserve groups, but it should.
mutate(group = 1)
}
# Proposed layer composition
compute_dnorm_prop_sds <- function(data, scales, null = .5,
dist_sds = -4:4){
n <- nrow(data)
sd = sqrt(null * (1 - null)/n) # sd of the null distribution
q <- dist_sds * sd + null
data.frame(x = q + 1) %>%
mutate(height = dnorm(q, sd = sd, mean = null)) %>%
mutate(height_max = dnorm(0, sd = sd, mean = 0)) %>%
mutate(y = .35*n*height/height_max) %>% # This is a bit fragile...
mutate(xend = x,
yend = 0)
}
donor |>
ggplot() +
aes(x = Choice) +
# 1. geom_stack() -- show counts in a count-y way (i.e. bricks!),
# with a good amount of space in-between stacks
qlayer(geom = GeomTile,
stat = qstat(compute_group_bricks)) +
# 2. geom_stack_label() -- label stacks
qlayer(geom = GeomText,
stat = qstat(compute_group_count)) +
# 3. geom_xrange, show scale, range at y is zero
qlayer(geom = GeomSegment,
stat = qstat_panel(compute_scale)) +
# 4. geom_prop, show prop, i.e. balancing point
qlayer(geom = GeomText,
stat = qstat_panel(compute_xmean_at_y0)) +
# 5. geom_prop_label, labeling prop, balancing point
qlayer(geom = GeomLabel,
stat = qstat_panel(compute_xmean_at_y0_label)) + # by
# 6. stamp_prop, assertion, point
qlayer(geom = GeomText,
stat = qstat_panel(compute_panel_prop_asserted)) +
# 7. stamp_prop_label, assertion, label
qlayer(geom = GeomLabel,
stat = qstat_panel(compute_panel_prop_asserted_label)) +
# 8. geom_norm on prop plot
qlayer(geom = GeomArea,
stat = qstat_panel(compute_dnorm_prop)) +
# 9. geom_prop_norm w/ sd marks
qlayer(geom = GeomSegment,
stat = qstat_panel(compute_dnorm_prop_sds)) +
labs(title = "Is there statistical evidence that choice to between being an\nan organ donar or not differs from 50/50")

donor |>
ggplot() +
aes(x = Choice) +
# 1. geom_stack() -- show counts in a count-y way (i.e. bricks!),
# with a good amount of space in-between stacks
qlayer(geom = qproto_update(GeomTile, aes(color = "white")),
stat = qstat(compute_group_bricks)) +
# 2. geom_stack_label() -- label stacks
qlayer(geom = qproto_update(GeomText, aes(vjust = 0)),
stat = qstat(compute_group_count)) +
# 3. geom_xrange, show scale, range at y is zero
qlayer(geom = GeomSegment,
stat = qstat_panel(compute_scale)) +
# 4. geom_prop, show prop, i.e. balancing point
qlayer(geom = qproto_update(GeomText, aes(size = 6, vjust = 1)),
stat = qstat_panel(compute_xmean_at_y0)) +
# 5. geom_prop_label, labeling prop, balancing point
qlayer(geom = qproto_update(GeomLabel, aes(fill = NA, label.size = NA, vjust = 0)),
stat = qstat_panel(compute_xmean_at_y0_label)) + # by
# 6. stamp_prop, assertion, point
qlayer(geom = qproto_update(GeomText, aes(size = 6, vjust = 1, color = "red")),
stat = qstat_panel(compute_panel_prop_asserted)) +
# 7. stamp_prop_label, assertion, label
qlayer(geom = qproto_update(GeomLabel, aes(fill = NA, label.size = NA, vjust = 0, color = "red")),
stat = qstat_panel(compute_panel_prop_asserted_label)) +
# 8. geom_norm on prop plot
qlayer(geom = qproto_update(GeomArea, aes(alpha = .2)),
stat = qstat(compute_dnorm_prop),
mapping = aes(x = 1, y = 1)) +
# 9. geom_prop_norm w/ sd marks
qlayer(geom = qproto_update(GeomSegment, aes(linetype = "dotted")),
stat = qstat(compute_dnorm_prop),
mapping = aes(x = 1, y = 1),
dist_sds = -3:3) +
labs(title = "Is there statistical evidence that choice to between being an\nan organ donar or not differs from 50/50")
## Warning in qlayer(geom = qproto_update(GeomArea, aes(alpha = 0.2)), stat = qstat(compute_dnorm_prop), : All aesthetics have length 1, but the data has 161 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in qlayer(geom = qproto_update(GeomSegment, aes(linetype = "dotted")), : All aesthetics have length 1, but the data has 161 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
