Step 1. Visualize the difference in means

library(tidyverse)
library(ggt.test)

data_old_faithful
##     year time
## 1   1978   78
## 2   1978   74
## 3   1978   68
## 4   1978   76
## 5   1978   80
## 6   1978   84
## 7   1978   50
## 8   1978   93
## 9   1978   55
## 10  1978   76
## 11  1978   58
## 12  1978   74
## 13  1978   75
## 14  1978   80
## 15  1978   56
## 16  1978   80
## 17  1978   69
## 18  1978   57
## 19  1978   90
## 20  1978   42
## 21  1978   91
## 22  1978   51
## 23  1978   79
## 24  1978   53
## 25  1978   82
## 26  1978   51
## 27  1978   76
## 28  1978   82
## 29  1978   84
## 30  1978   53
## 31  1978   86
## 32  1978   51
## 33  1978   85
## 34  1978   45
## 35  1978   88
## 36  1978   51
## 37  1978   80
## 38  1978   49
## 39  1978   82
## 40  1978   75
## 41  1978   73
## 42  1978   67
## 43  1978   68
## 44  1978   86
## 45  1978   72
## 46  1978   75
## 47  1978   75
## 48  1978   66
## 49  1978   84
## 50  1978   70
## 51  1978   79
## 52  1978   60
## 53  1978   86
## 54  1978   71
## 55  1978   67
## 56  1978   81
## 57  1978   76
## 58  1978   83
## 59  1978   76
## 60  1978   55
## 61  1978   73
## 62  1978   56
## 63  1978   83
## 64  1978   57
## 65  1978   71
## 66  1978   72
## 67  1978   77
## 68  1978   55
## 69  1978   75
## 70  1978   73
## 71  1978   70
## 72  1978   83
## 73  1978   50
## 74  1978   95
## 75  1978   51
## 76  1978   82
## 77  1978   54
## 78  1978   83
## 79  1978   51
## 80  1978   80
## 81  1978   78
## 82  1978   81
## 83  1978   53
## 84  1978   89
## 85  1978   44
## 86  1978   78
## 87  1978   61
## 88  1978   73
## 89  1978   75
## 90  1978   73
## 91  1978   76
## 92  1978   55
## 93  1978   86
## 94  1978   48
## 95  1978   77
## 96  1978   73
## 97  1978   70
## 98  1978   88
## 99  1978   75
## 100 1978   83
## 101 1978   61
## 102 1978   78
## 103 1978   61
## 104 1978   81
## 105 1978   51
## 106 1978   80
## 107 1978   79
## 108 2003   91
## 109 2003   86
## 110 2003   86
## 111 2003   88
## 112 2003   86
## 113 2003   71
## 114 2003  102
## 115 2003   92
## 116 2003   99
## 117 2003   99
## 118 2003   79
## 119 2003  106
## 120 2003   58
## 121 2003   86
## 122 2003   87
## 123 2003   91
## 124 2003   98
## 125 2003   83
## 126 2003   99
## 127 2003   87
## 128 2003   93
## 129 2003   77
## 130 2003   85
## 131 2003   96
## 132 2003   91
## 133 2003   56
## 134 2003   92
## 135 2003   90
## 136 2003   87
## 137 2003   88
## 138 2003   95
## 139 2003   88
## 140 2003   99
## 141 2003   99
## 142 2003   89
## 143 2003  100
## 144 2003   93
## 145 2003   87
## 146 2003  100
## 147 2003   98
## 148 2003   95
## 149 2003   90
## 150 2003  107
## 151 2003   90
## 152 2003   96
## 153 2003  107
## 154 2003   88
## 155 2003  100
## 156 2003   90
## 157 2003   87
## 158 2003   93
## 159 2003   94
## 160 2003   87
## 161 2003   98
## 162 2003   96
## 163 2003   88
## 164 2003   96
## 165 2003   90
## 166 2003  101
## 167 2003   96
## 168 2003   86
## 169 2003   92
## 170 2003   86
## 171 2003   95
## 172 2003   85
## 173 2003   84
## 174 2003   93
## 175 2003   82
## 176 2003   92
## 177 2003   87
## 178 2003   87
## 179 2003   89
## 180 2003   99
## 181 2003   89
## 182 2003   87
## 183 2003   91
## 184 2003   94
## 185 2003   87
## 186 2003  102
## 187 2003  103
## 188 2003   98
## 189 2003   98
## 190 2003   84
## 191 2003   94
## 192 2003   95
## 193 2003   82
## 194 2003   89
## 195 2003  110
## 196 2003   90
## 197 2003   92
## 198 2003   87
## 199 2003   85
## 200 2003   99
## 201 2003   99
## 202 2003   85

‘Interlude’ - What if there was no relationship between vars - shuffling? Is difference consistent with no relationship?

In 10 shuffles, how many times is the difference under than observed diff 32.21…

data_old_faithful |> 
  data_shuffle_var(var = time) |>  # shuffle - no association between vars
  ggplot() + 
  aes(x = shuffled) + #<< shuffled values 
  geom_stacks() +
  geom_mean() + 
  geom_mean_diff() +
  facet_align(year) + 
  geom_mean_diff() +
  geom_mean_diff_label()


# last_plot() + (data_haircuts |> data_shuffle_var(var = price))
# 
# last_plot() + (data_haircuts |> data_shuffle_var(var = price))
# 
# last_plot() + (data_haircuts |> data_shuffle_var(var = price))
# 
# last_plot() + (data_haircuts |> data_shuffle_var(var = price))
# 
# last_plot() + (data_haircuts |> data_shuffle_var(var = price))

Step 2. Visualize distribution for null (and standard error)

data_old_faithful |> 
  count(year)
##   year   n
## 1 1978 107
## 2 2003  95
data_old_faithful |> 
  group_by(year) |>
  summarise(sd = sd(time))
## # A tibble: 2 × 2
##    year    sd
##   <int> <dbl>
## 1  1978 13.0 
## 2  2003  8.46