Skip to content

Instantly share code, notes, and snippets.

@allatambov
Created February 18, 2021 14:35
Show Gist options
  • Save allatambov/09e73d7d21d5622dc095fa81c5c7109e to your computer and use it in GitHub Desktop.
Save allatambov/09e73d7d21d5622dc095fa81c5c7109e to your computer and use it in GitHub Desktop.
######### DATA LOADING #########
popular <- read.csv("https://raw.githubusercontent.com/allatambov/allatambov.github.io/master/rprog21/history.csv",
stringsAsFactors = TRUE)
summary(popular)
n <- nrow(popular)
######### DATA FILTERING #########
pop_hum <- popular[popular$domain == "Humanities", ]
n_hum <- nrow(pop_hum)
pop_sci <- popular[popular$domain == "Science & Technology", ]
n_sci <- nrow(pop_sci)
######### CI FOR PROPORTION #########
# install.packages("DescTools")
library(DescTools)
BinomCI(n_hum, n, conf.level = 0.95)
# by hand
p_hum <- n_hum / n
se_hum <- sqrt(p_hum * (1 - p_hum) / n)
z_hum <- qnorm(p = 0.975)
p_hum - z_hum * se_hum; p_hum + z_hum * se_hum
######### COMPARING CIs FOR PROPORTIONS #########
ci_p1 <- BinomCI(n_hum, n, conf.level = 0.95)
ci_p2 <- BinomCI(n_sci, n, conf.level = 0.95)
ci_p1
ci_p2
x <- 1:2
y <- c(ci_p1[1], ci_p2[1])
lower_ci <- c(ci_p1[2], ci_p2[2])
upper_ci <- c(ci_p1[3], ci_p2[3])
dat_ci <- cbind.data.frame(x, y, lower_ci, upper_ci)
library(ggplot2)
ggplot(dat_ci, aes(x = x, y = y)) +
geom_errorbar(aes(ymax = lower_ci, ymin = upper_ci), color = c("red", "blue")) +
geom_point(size = 4, color = c("red", "blue")) +
labs(x = "95% confidence intervals", y = "proportions")
######### CI FOR MEAN #########
MeanCI(pop_hum$historical_popularity_index)
# by hand
m <- mean(pop_hum$historical_popularity_index)
se_m <- sd(pop_hum$historical_popularity_index) / sqrt(n_hum)
t_m <- qt(df = n_hum - 1, p = 0.975)
m - t_m * se_m; m + t_m * se_m
######### COMPARING CIs FOR MEANS #########
ci_m1 <- MeanCI(pop_hum$historical_popularity_index)
ci_m2 <- MeanCI(pop_sci$historical_popularity_index)
ci_m1
ci_m2
x2 <- 1:2
y2 <- c(ci_m1[1], ci_m2[1])
lower_ci2 <- c(ci_m1[2], ci_m2[2])
upper_ci2 <- c(ci_m1[3], ci_m2[3])
dat_ci2 <- cbind.data.frame(x2, y2, lower_ci2, upper_ci2)
ggplot(dat_ci2, aes(x = x2, y = y2)) +
geom_errorbar(aes(ymax = lower_ci2, ymin = upper_ci2), color = c("red", "blue")) +
geom_point(size = 4, color = c("red", "blue")) +
labs(x = "95% confidence intervals", y = "proportions")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment