passengers %>% as.tibble() -> passengers
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
passengers %>% 
  ggplot() + 
  aes(x = age) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 557 rows containing non-finite values (`stat_bin()`).

passengers %>% 
  ggplot() + 
  aes(x = age) + 
  aes(y = survived) + 
  geom_jitter(width = 0 )
## Warning: Removed 557 rows containing missing values (`geom_point()`).

passengers %>% 
  mutate(adult = case_when(age>=18 ~ TRUE,
                           age<18 ~ FALSE)) %>% 
  count(adult)
## # A tibble: 3 × 2
##   adult     n
##   <lgl> <int>
## 1 FALSE    96
## 2 TRUE    660
## 3 NA      557
passengers %>% 
  mutate(adult = case_when(age>=18 ~ TRUE,
                           age<18 ~ FALSE,
                           TRUE ~ NA)) %>% 
  count(adult)
## # A tibble: 3 × 2
##   adult     n
##   <lgl> <int>
## 1 FALSE    96
## 2 TRUE    660
## 3 NA      557
# When the data defies our expectations
# Suppose we believe that adulthood ends at 49
passengers %>% 
  mutate(adult = case_when(age > 18 & age < 70 ~ TRUE,
                           age < 18 ~ FALSE,
                           is.na(NA) ~ NA)) %>% 
  count(adult)
## # A tibble: 3 × 2
##   adult     n
##   <lgl> <int>
## 1 FALSE    96
## 2 TRUE    626
## 3 NA      591
passengers %>%
  mutate(adult = case_when(age >= 18 & age < 70 ~ "adult",
                           age %in% 0:17 ~ "child",
                           is.na(age) ~ NA,
                           TRUE ~ "Throw an error, unanticipated case"))
## # A tibble: 1,313 × 6
##    name                                          class   age sex   survi…¹ adult
##    <chr>                                         <chr> <dbl> <chr>   <int> <chr>
##  1 Allen, Miss Elisabeth Walton                  1st   29    fema…       1 adult
##  2 Allison, Miss Helen Loraine                   1st    2    fema…       0 child
##  3 Allison, Mr Hudson Joshua Creighton           1st   30    male        0 adult
##  4 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st   25    fema…       0 adult
##  5 Allison, Master Hudson Trevor                 1st    0.92 male        1 Thro…
##  6 Anderson, Mr Harry                            1st   47    male        1 adult
##  7 Andrews, Miss Kornelia Theodosia              1st   63    fema…       1 adult
##  8 Andrews, Mr Thomas, jr                        1st   39    male        0 adult
##  9 Appleton, Mrs Edward Dale (Charlotte Lamson)  1st   58    fema…       1 adult
## 10 Artagaveytia, Mr Ramon                        1st   71    male        0 Thro…
## # … with 1,303 more rows, and abbreviated variable name ¹​survived
# mixing variable types does not work
passengers %>%
  mutate(adult = case_when(age > 18 & age < 70 ~ "adult",
                           age < 18 ~ "child",
                           is.na(age) ~ NA,
                           .default = paste("Warning! Unanticipated case detected: ", age))) %>% 
  head()
## # A tibble: 6 × 6
##   name                                          class   age sex    survi…¹ adult
##   <chr>                                         <chr> <dbl> <chr>    <int> <chr>
## 1 Allen, Miss Elisabeth Walton                  1st   29    female       1 adult
## 2 Allison, Miss Helen Loraine                   1st    2    female       0 child
## 3 Allison, Mr Hudson Joshua Creighton           1st   30    male         0 adult
## 4 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st   25    female       0 adult
## 5 Allison, Master Hudson Trevor                 1st    0.92 male         1 child
## 6 Anderson, Mr Harry                            1st   47    male         1 adult
## # … with abbreviated variable name ¹​survived
ifelse(1, "yes", warning())
## [1] "yes"
ifelse(0, "yes", warning("hello"))
## Warning in ifelse(0, "yes", warning("hello")): hello
## [1] "hello"
# ifelse(0, "yes", errorCondition(message = "hi", ))


passengers %>%
  mutate(adult = case_when(age > 18 & age < 70 ~ "adult",
                           age < 18 ~ "child",
                           is.na(age) ~ NA,
                           TRUE ~"Warn!, unanticipated case"))
## # A tibble: 1,313 × 6
##    name                                          class   age sex   survi…¹ adult
##    <chr>                                         <chr> <dbl> <chr>   <int> <chr>
##  1 Allen, Miss Elisabeth Walton                  1st   29    fema…       1 adult
##  2 Allison, Miss Helen Loraine                   1st    2    fema…       0 child
##  3 Allison, Mr Hudson Joshua Creighton           1st   30    male        0 adult
##  4 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st   25    fema…       0 adult
##  5 Allison, Master Hudson Trevor                 1st    0.92 male        1 child
##  6 Anderson, Mr Harry                            1st   47    male        1 adult
##  7 Andrews, Miss Kornelia Theodosia              1st   63    fema…       1 adult
##  8 Andrews, Mr Thomas, jr                        1st   39    male        0 adult
##  9 Appleton, Mrs Edward Dale (Charlotte Lamson)  1st   58    fema…       1 adult
## 10 Artagaveytia, Mr Ramon                        1st   71    male        0 Warn…
## # … with 1,303 more rows, and abbreviated variable name ¹​survived
case_when_strict <- function(var, ...){
  
  
  case_when(..., 
            .default = paste0("Warning! Unanticipated value in '", deparse(substitute(var)), "': ",
                             {{var}})
            )
  
  
}



passengers %>%
  mutate(adult = case_when_strict(age >= 18 & age < 70 ~ "adult",
                           age < 18 ~ "child",
                           is.na(age) ~ NA,
                           var = age)) ->
cleaned_data

check_for_unanticipated_cats <- function(data){

data %>% 
  as.matrix() %>% 
  as.vector() ->
all_values

all_values %>%  
  str_detect("Unanticipated value") -> 
vector_true

all_values[vector_true] %>% 
  tibble(message  = . ) %>% 
  count(message) %>% 
  filter(!is.na(message)) ->
messages_summary

if(nrow(messages_summary) == 0){
  
  "No unanticipated value messages detected"
  
}else{messages_summary}

}

check_for_unanticipated_cats(data = cleaned_data)
## # A tibble: 2 × 2
##   message                                       n
##   <chr>                                     <int>
## 1 Warning! Unanticipated value in 'age': 70     1
## 2 Warning! Unanticipated value in 'age': 71     3
passengers %>%
  mutate(adult = case_when_strict(age >= 18 ~ "adult",
                           age < 18 ~ "child",
                           is.na(age) ~ NA,
                           var = age)) ->
cleaned_data


check_for_unanticipated_cats(data = cleaned_data)
## [1] "No unanticipated value messages detected"