
Adam J Sullivan
Assistant Professor of Biostatistics
Brown University
Success'' or
Failure.'' These successes may not be a positive thing as the word success implies but any outcome that a researcher is interested in. success
or failure
.success
is \(p\) and the probability of a failure
is \(1-p\). We know that \(X\sim \text{ Bin}(n,p)\) where \(n=5\). Then we have the following pmf: \[p(k)= \binom{5}{k}p^x(1-p)^{n-k}\;\;\;\;\;\;\;\; k=0,1,2,3,4,5\] These probabilities give us the following table:
\(k\) | \(\Pr(k)\) |
---|---|
0 | 0.16807 |
1 | 0.36015 |
2 | 0.30870 |
3 | 0.13230 |
4 | 0.02835 |
5 | 0.00243 |
\[f(x) = \dfrac{1}{\sigma\sqrt{2\pi}}e^{-(x-\mu)^2/2\sigma^2}, \;\;\;\;\;\;\;\; -\infty
fivethirtyeight
package:library(fivethirtyeight)
?comic_characters
library(fivethirtyeight)
library(tidyverse)
cnt <- comic_characters%>%
group_by(publisher) %>%
tally()
mn<- comic_characters%>%
group_by(publisher) %>%
summarise(mean_app=mean(appearances, na.rm=T))
full_join(cnt,mn)
## # A tibble: 2 x 3
## publisher n mean_app
## <chr> <int> <dbl>
## 1 DC 6896 23.6
## 2 Marvel 16376 17.0
t.test(appearances~publisher, comic_characters)
##
## Welch Two Sample t-test
##
## data: appearances by publisher
## t = 4.9476, df = 13552, p-value = 7.605e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.980214 9.203299
## sample estimates:
## mean in group DC mean in group Marvel
## 23.62513 17.03338
library(broom)
tidy(aov(appearances~publisher, comic_characters))
## # A tibble: 2 x 6
## term df sumsq meansq statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 publisher 1 199019. 199019. 22.6 0.00000197
## 2 Residuals 21819 191840416. 8792. NA NA
t.test(appearances~publisher, comic_characters, var.equal=TRUE)
##
## Two Sample t-test
##
## data: appearances by publisher
## t = 4.7577, df = 21819, p-value = 1.971e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.876078 9.307436
## sample estimates:
## mean in group DC mean in group Marvel
## 23.62513 17.03338
model <- lm(appearances~publisher, comic_characters)
tidy(model)
glance(model)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 23.6 1.16 20.4 1.89e-91
## 2 publisherMarvel -6.59 1.39 -4.76 1.97e- 6
## # A tibble: 1 x 11
## r.squared adj.r.squared sigma statistic p.value df logLik AIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl>
## 1 0.00104 0.000991 93.8 22.6 1.97e-6 2 -1.30e5 2.60e5
## # ... with 3 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>
mod <- lm(appearances~sex, comic_characters)
tidy(mod)
## # A tibble: 6 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 19.7 14.8 1.33 0.182
## 2 sexFemale Characters 1.37 14.8 0.0927 0.926
## 3 sexGenderfluid Characters 263. 69.2 3.80 0.000146
## 4 sexGenderless Characters -6.82 26.4 -0.258 0.796
## 5 sexMale Characters -0.640 14.8 -0.0433 0.965
## 6 sexTransgender Characters -15.7 96.7 -0.162 0.871
## # A tibble: 7 x 3
## sex n mean_sex
## <chr> <int> <dbl>
## 1 Agender Characters 45 19.7
## 2 Female Characters 5804 21.0
## 3 Genderfluid Characters 2 282.
## 4 Genderless Characters 20 12.8
## 5 Male Characters 16421 19.0
## 6 Transgender Characters 1 4
## 7 <NA> 979 5.13
comic_characters <- comic_characters %>%
mutate(sex = fct_recode(sex,
"Agender" = "Agender Characters",
"Female" = "Female Characters",
"Genderfluid" = "Genderfluid Characters",
"Genderless" = "Genderless Characters",
"Male" = "Male Characters",
"Transgender" = "Transgender Characters"
))
comic_characters <- comic_characters %>%
mutate(sex = fct_relevel(sex,
"Female",
"Male",
"Transgender",
"Agender",
"Genderfluid" ,
"Genderless"
))
mod <- lm(appearances~sex, comic_characters)
tidy(mod)
## # A tibble: 6 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 21.0 1.29 16.3 2.65e-59
## 2 sexMale -2.01 1.50 -1.34 1.81e- 1
## 3 sexTransgender -17.0 95.6 -0.178 8.59e- 1
## 4 sexAgender -1.37 14.8 -0.0927 9.26e- 1
## 5 sexGenderfluid 261. 67.6 3.87 1.10e- 4
## 6 sexGenderless -8.20 22.0 -0.373 7.09e- 1
ggplot(comic_characters, aes(x = sex, y = appearances)) +
geom_point() +
geom_point(stat = "summary", fun.y = "mean", color = "red", size = 3) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))