Intro Thoughts

Status Quo

library(tidyverse)

Experiment

library(tidyverse)

compute_panel_pca_rows <- function(data, scales){
  
  data %>%  remove_missing() ->
    data

  data %>% 
    select(outcome, a, b, c) %>% # as proof of concept 3 predictors a b c
    ordr::ordinate(cols = 2:4, model = ~ prcomp(., scale. = TRUE)) %>% 
  .$x %>% 
    as.data.frame() %>% 
    bind_cols(data["outcome"])
    
}

palmerpenguins::penguins %>% 
  rename(outcome = species, a = bill_length_mm, b = bill_depth_mm, c = flipper_length_mm) %>%
  compute_panel_pca_rows() %>% 
  head()
## Warning: Removed 11 rows containing missing values or values outside the scale
## range.
##          PC1         PC2        PC3 outcome
## 1 -1.8312137  0.05018344  0.2860721  Adelie
## 2 -1.2184054  0.48737657  0.3278652  Adelie
## 3 -0.8697485  0.13788110 -0.2039821  Adelie
## 4 -1.6639864  0.07602012 -0.7088374  Adelie
## 5 -1.8801939 -0.72572749 -0.5742510  Adelie
## 6 -1.6179709  0.41909356  0.4553261  Adelie
StatPcarows <- ggproto(`_class` = "StatPcarows",
                       `_inherit` = Stat,
                       compute_panel = compute_panel_pca_rows,
                       default_aes = aes(x = after_stat(PC1), 
                                         y = after_stat(PC2), 
                                         color = after_stat(outcome)))

compute_panel_pca_col <- function(data, scales){
  
  data %>%  remove_missing() ->
    data

  data %>% 
    select(outcome, a, b, c) %>% # as proof of concept 3 predictors a b c
    ordr::ordinate(cols = 2:4, model = ~ prcomp(., scale. = TRUE)) %>% 
    .$rotation %>% 
    as.data.frame() %>% 
    rownames_to_column()
    
}

palmerpenguins::penguins %>% 
  rename(outcome = species, a = bill_length_mm, b = bill_depth_mm, c = flipper_length_mm) %>%
  compute_panel_pca_col()
## Warning: Removed 11 rows containing missing values or values outside the scale
## range.
##   rowname        PC1         PC2        PC3
## 1       a  0.5513631 -0.65493615  0.5167759
## 2       b -0.5107043 -0.75478128 -0.4116872
## 3       c  0.6596816 -0.03693055 -0.7506373
StatPcacols <- ggproto(`_class` = "StatPcacols", Stat,
                      compute_panel = compute_panel_pca_col, 
                      default_aes = aes(x = after_stat(PC1), 
                                        y = after_stat(PC2), 
                                        xend = after_stat(0),
                                        yend = after_stat(0), 
                                        label = paste("Variable",
                                                      after_stat(rowname))))


palmerpenguins::penguins %>% 
  ggplot() + 
  aes(outcome = species, a = bill_length_mm, 
      b = bill_depth_mm, c = flipper_length_mm) +
  geom_point(stat = StatPcarows) + 
  geom_text(stat = StatPcacols, hjust = "outward") + 
  geom_segment(stat = StatPcacols, 
                arrow = arrow(ends = "first"))
## Warning: Removed 2 rows containing missing values or values outside the scale
## range.
## Warning: Removed 2 rows containing missing values or values outside the scale range.
## Removed 2 rows containing missing values or values outside the scale range.

Closing remarks, Other Relevant Work, Caveats