passengers %>% as.tibble() -> passengers
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
passengers %>%
ggplot() +
aes(x = age) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 557 rows containing non-finite values (`stat_bin()`).
passengers %>%
ggplot() +
aes(x = age) +
aes(y = survived) +
geom_jitter(width = 0 )
## Warning: Removed 557 rows containing missing values (`geom_point()`).
passengers %>%
mutate(adult = case_when(age>=18 ~ TRUE,
age<18 ~ FALSE)) %>%
count(adult)
## # A tibble: 3 × 2
## adult n
## <lgl> <int>
## 1 FALSE 96
## 2 TRUE 660
## 3 NA 557
passengers %>%
mutate(adult = case_when(age>=18 ~ TRUE,
age<18 ~ FALSE,
TRUE ~ NA)) %>%
count(adult)
## # A tibble: 3 × 2
## adult n
## <lgl> <int>
## 1 FALSE 96
## 2 TRUE 660
## 3 NA 557
# When the data defies our expectations
# Suppose we believe that adulthood ends at 49
passengers %>%
mutate(adult = case_when(age > 18 & age < 70 ~ TRUE,
age < 18 ~ FALSE,
is.na(NA) ~ NA)) %>%
count(adult)
## # A tibble: 3 × 2
## adult n
## <lgl> <int>
## 1 FALSE 96
## 2 TRUE 626
## 3 NA 591
passengers %>%
mutate(adult = case_when(age >= 18 & age < 70 ~ "adult",
age %in% 0:17 ~ "child",
is.na(age) ~ NA,
TRUE ~ "Throw an error, unanticipated case"))
## # A tibble: 1,313 × 6
## name class age sex survi…¹ adult
## <chr> <chr> <dbl> <chr> <int> <chr>
## 1 Allen, Miss Elisabeth Walton 1st 29 fema… 1 adult
## 2 Allison, Miss Helen Loraine 1st 2 fema… 0 child
## 3 Allison, Mr Hudson Joshua Creighton 1st 30 male 0 adult
## 4 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25 fema… 0 adult
## 5 Allison, Master Hudson Trevor 1st 0.92 male 1 Thro…
## 6 Anderson, Mr Harry 1st 47 male 1 adult
## 7 Andrews, Miss Kornelia Theodosia 1st 63 fema… 1 adult
## 8 Andrews, Mr Thomas, jr 1st 39 male 0 adult
## 9 Appleton, Mrs Edward Dale (Charlotte Lamson) 1st 58 fema… 1 adult
## 10 Artagaveytia, Mr Ramon 1st 71 male 0 Thro…
## # … with 1,303 more rows, and abbreviated variable name ¹survived
# mixing variable types does not work
passengers %>%
mutate(adult = case_when(age > 18 & age < 70 ~ "adult",
age < 18 ~ "child",
is.na(age) ~ NA,
.default = paste("Warning! Unanticipated case detected: ", age))) %>%
head()
## # A tibble: 6 × 6
## name class age sex survi…¹ adult
## <chr> <chr> <dbl> <chr> <int> <chr>
## 1 Allen, Miss Elisabeth Walton 1st 29 female 1 adult
## 2 Allison, Miss Helen Loraine 1st 2 female 0 child
## 3 Allison, Mr Hudson Joshua Creighton 1st 30 male 0 adult
## 4 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25 female 0 adult
## 5 Allison, Master Hudson Trevor 1st 0.92 male 1 child
## 6 Anderson, Mr Harry 1st 47 male 1 adult
## # … with abbreviated variable name ¹survived
ifelse(1, "yes", warning())
## [1] "yes"
ifelse(0, "yes", warning("hello"))
## Warning in ifelse(0, "yes", warning("hello")): hello
## [1] "hello"
# ifelse(0, "yes", errorCondition(message = "hi", ))
passengers %>%
mutate(adult = case_when(age > 18 & age < 70 ~ "adult",
age < 18 ~ "child",
is.na(age) ~ NA,
TRUE ~"Warn!, unanticipated case"))
## # A tibble: 1,313 × 6
## name class age sex survi…¹ adult
## <chr> <chr> <dbl> <chr> <int> <chr>
## 1 Allen, Miss Elisabeth Walton 1st 29 fema… 1 adult
## 2 Allison, Miss Helen Loraine 1st 2 fema… 0 child
## 3 Allison, Mr Hudson Joshua Creighton 1st 30 male 0 adult
## 4 Allison, Mrs Hudson JC (Bessie Waldo Daniels) 1st 25 fema… 0 adult
## 5 Allison, Master Hudson Trevor 1st 0.92 male 1 child
## 6 Anderson, Mr Harry 1st 47 male 1 adult
## 7 Andrews, Miss Kornelia Theodosia 1st 63 fema… 1 adult
## 8 Andrews, Mr Thomas, jr 1st 39 male 0 adult
## 9 Appleton, Mrs Edward Dale (Charlotte Lamson) 1st 58 fema… 1 adult
## 10 Artagaveytia, Mr Ramon 1st 71 male 0 Warn…
## # … with 1,303 more rows, and abbreviated variable name ¹survived
case_when_strict <- function(var, ...){
case_when(...,
.default = paste0("Warning! Unanticipated value in '", deparse(substitute(var)), "': ",
{{var}})
)
}
passengers %>%
mutate(adult = case_when_strict(age >= 18 & age < 70 ~ "adult",
age < 18 ~ "child",
is.na(age) ~ NA,
var = age)) ->
cleaned_data
check_for_unanticipated_cats <- function(data){
data %>%
as.matrix() %>%
as.vector() ->
all_values
all_values %>%
str_detect("Unanticipated value") ->
vector_true
all_values[vector_true] %>%
tibble(message = . ) %>%
count(message) %>%
filter(!is.na(message)) ->
messages_summary
if(nrow(messages_summary) == 0){
"No unanticipated value messages detected"
}else{messages_summary}
}
check_for_unanticipated_cats(data = cleaned_data)
## # A tibble: 2 × 2
## message n
## <chr> <int>
## 1 Warning! Unanticipated value in 'age': 70 1
## 2 Warning! Unanticipated value in 'age': 71 3
passengers %>%
mutate(adult = case_when_strict(age >= 18 ~ "adult",
age < 18 ~ "child",
is.na(age) ~ NA,
var = age)) ->
cleaned_data
check_for_unanticipated_cats(data = cleaned_data)
## [1] "No unanticipated value messages detected"