Skip to content

Instantly share code, notes, and snippets.

@daob
Created August 31, 2017 14:48
Show Gist options
  • Save daob/448c7872297160168d082bc232b8adcb to your computer and use it in GitHub Desktop.
Save daob/448c7872297160168d082bc232b8adcb to your computer and use it in GitHub Desktop.
library(lubridate)
library(rjson)
library(tidyverse)
library(ISOcodes)
library(ggplot2)
library(ggthemes)
setwd("~/Dropbox/ERC/templates/")
ess7 <- haven::read_dta("ESS7e02_1.dta")
data("ISO_639_2")
rownames(ISO_639_2) <- ISO_639_2$Alpha_3_B
ess7_values <- ess7 %>%
dplyr::select(idno, cntry, dweight, lnghom1, ipcrtiv:impfun) %>%
dplyr::mutate_at(-(1:4), ~ ifelse(.x > 6, NA, .x)) %>%
dplyr::mutate(language_home = ifelse(lnghom1 %in% c("777", "888", "999"), NA, lnghom1)) %>%
dplyr::mutate(language_home = ifelse(language_home == "GSW", "GER", language_home)) # Swiss German = German
ess7_values$language_home_iso2 <- ISO_639_2[tolower(ess7_values$language_home), 'Alpha_2']
ess7_values$language_home_name <- ISO_639_2[tolower(ess7_values$language_home), 'Name']
# Filter out small-time langauges
ess7_languages <- ess7_values %>%
dplyr::group_by(language_home_iso2) %>%
dplyr::summarise(n = n()) %>%
dplyr::filter(n > 200)
ess7_values <- ess7_values %>%
dplyr::filter(language_home_iso2 %in% ess7_languages$language_home_iso2)
# Show languages and counts in data
table(ess7_values$language_home_iso2, exclude = c())
# table(ess7_values$language_home_iso2, exclude = c()) %>% sort %>% barplot
summary(ess7_values)
ess7_values_means <- ess7_values %>%
dplyr::group_by(language_home_iso2) %>%
dplyr::summarise_if(~ is.numeric(.x), mean, na.rm = TRUE)
get_counts_perproject <- function(lang_iso2) {
url <- "https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/%s.wikipedia.org/all-access/all-agents/daily/20150701/20160701" %>%
sprintf(lang_iso2)
d <- rjson::fromJSON(file = url)$items %>%
dplyr::bind_rows() %>%
mutate(date = lubridate::as_date(timestamp, format = "%Y%m%d"))
d
}
page_names <- c("William_Shakespeare", "Game_of_Thrones")
get_counts_perpage <- function(lang_iso2, page_name) {
url <- "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/%s.wikipedia/all-access/all-agents/%s/daily/20150701/20160701" %>%
sprintf(lang_iso2, page_name)
d <- rjson::fromJSON(file = url)$items %>%
dplyr::bind_rows() %>%
mutate(date = lubridate::as_date(timestamp, format = "%Y%m%d"))
d
}
pageviews_raw <- lapply(ess7_values_means$language_home_iso2, function(lang_iso2) {
tryCatch(lapply(page_names, function(page_name) get_counts_perpage(lang_iso2, page_name)),
error = function(e) return(NULL))
})
pageviews_project <- lapply(ess7_values_means$language_home_iso2, function(lang_iso2) {
tryCatch(get_counts_perproject(lang_iso2),
error = function(e) return(NULL))
})
d <- list()
for(iproject in seq_along(pageviews_raw)) {
if(!is.null(pageviews_raw[[iproject]]))
d[[iproject]] <-
dplyr::inner_join(pageviews_raw[[iproject]][[1]], pageviews_raw[[iproject]][[2]], by = c("date")) %>%
dplyr::inner_join(pageviews_project[[iproject]],
by = c("date"))
}
d <- Reduce(bind_rows, d)
wiki_views <- d %>%
dplyr::mutate(views_raw_high = ifelse(is.na(views.x), 0, views.x),
views_raw_low = ifelse(is.na(views.y), 0, views.y),
views_project = views) %>%
tidyr::separate(project, into = c("language", "project")) %>%
select(language, date, starts_with("views_"))
mean_interest <- wiki_views %>%
group_by(language) %>%
summarize(views_raw_high_avg = median(views_raw_high),
views_raw_low_avg = median(views_raw_low),
views_project_avg = median(views_project))
ess7_values_means$language <- ess7_values_means$language_home_iso2
interest_joined <- inner_join(mean_interest, ess7_values_means, by = "language")
interest_joined %>%
mutate(`Important to be creative` = cut(ipcrtiv, quantile(ipcrtiv, probs = c(0, 0.33, 0.5, 0.67, 1)))) %>%
ggplot(aes(views_raw_low_avg, views_raw_high_avg, label = language,
colour = `Important to be creative`, group = `Important to be creative`)) +
ggplot2::scale_x_log10() + ggplot2::scale_y_log10() +
ggplot2::xlab("Wikipedia pageviews: 'Game of Thrones'") +
ggplot2::ylab("Wikipedia pageviews: 'William Shakespeare'") +
geom_smooth(method = "lm", se = F) +
geom_text() +
scale_colour_brewer(palette = "RdYlBu") +
ggplot2::ggtitle("Interest in Low versus high culture on Wikipedia\nis related to Schwarz human values") +
theme_minimal(base_family = "Helvetica") +
theme(panel.border = element_rect(colour = "#00000055", fill=NA, size=0.25))
base_width <- 6.5
golden_ratio <- 1.7
ggsave("low_vs_high.pdf", width = base_width, height = base_width/golden_ratio)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment