Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Last active November 27, 2017 05:40
Show Gist options
  • Save thoughtfulbloke/7076a6753f8778c1314570e8fe5d6673 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/7076a6753f8778c1314570e8fe5d6673 to your computer and use it in GitHub Desktop.
library(OECD)
library(feather)
library(dplyr)
library(countrycode)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
library(viridis)
# create folder for storing data if it does not already exist locally
dataFolder= "traffic_data"
if(!dir.exists(dataFolder)){
dir.create(dataFolder)
}
# get road accident data if it does not already exist locally
dataset <- "ITF_ROAD_ACCIDENTS"
accidentfile <- paste0(dataFolder,"/OECD_",dataset, ".feather")
if(!file.exists(accidentfile)){
dstruc <- get_data_structure(dataset)
df <- get_dataset(dataset)
df2 <- merge(df, dstruc$UNIT, by.x="UNIT", by.y="id")
names(df2)[9] <- "UNIT_label"
df3 <- merge(df2, dstruc$VARIABLE, by.x="VARIABLE", by.y="id")
names(df3)[10] <- "VARIABLE_label"
write_feather(df3, path=accidentfile)
}
# get average work hours data if it does not exist locally
dataset <- "ANHRS"
workfile <- paste0(dataFolder,"/OECD_",dataset, ".feather")
if(!file.exists(workfile)){
dstruc <- get_data_structure(dataset)
df <- get_dataset(dataset)
df2 <- merge(df, dstruc$EMPSTAT, by.x="EMPSTAT", by.y="id")
names(df2)[7] <- "EMPSTAT_label"
write_feather(df, path=workfile)
}
rm(list = setdiff(ls(), c('accidentfile','workfile')))
# read in accident data
ITF_ROAD_ACCIDENTS <- read_feather(accidentfile) %>%
filter(VARIABLE == "T-ACCI-KIL") %>%
mutate(obsTime = as.integer(obsTime)) %>%
select(COUNTRY, obsTime, toll = obsValue)
# read in work hours
OECD <- read_feather(workfile)
# combine fatalities from the accident data with the population data
combo <- OECD %>%
filter(EMPSTAT == "TE") %>%
mutate(obsTime = as.integer(obsTime)) %>%
select(COUNTRY, obsTime, work = obsValue) %>%
inner_join(ITF_ROAD_ACCIDENTS)
# make a lot of linear models (one for each country) and get their summaries
country_model <- function(df) {
lm(dependent ~ independent, data = df)
}
get_slope <- function(mdL) {
if (mdL$coefficients[2] < 0){
return(-1)
} else{
return(1)
}
}
comparisons <- combo %>%
arrange(COUNTRY, obsTime) %>% group_by(COUNTRY) %>%
mutate(independent = work,
dependent = toll) %>%
nest() %>%
mutate(model = map(data, country_model),
slope = map(model, get_slope),
glance = map(model, broom::glance)) %>%
unnest(glance, slope) %>% mutate(significance = (1-p.value)*slope)
comparisons %>%
mutate(state = if_else(COUNTRY=="NZL", "New Zealand", "Everyone Else")) %>%
ggplot(aes(x=slope, y=r.squared, colour=state, shape=state)) + geom_point() +
ggtitle("Road Toll ~ Work hours, 1177 obs, 36 countries (data ITF/OECD)") +
ylim(0,1) + xlim(-1, 1) +
scale_color_viridis(discrete=TRUE, end=0.9) + theme_bw() +
xlab("Significance of model") + ylab("Fit of Data (r squared)") +
geom_vline(xintercept=0.95, colour="red") +
geom_vline(xintercept=-0.95, colour="red") +
annotate(geom = "text", x = 0.90, y = 0.5,
label = "0.95 Significance, Postive slope",
color = "red", angle = -90) +
annotate(geom = "text", x = -0.90, y = 0.5,
label = "0.95 Significance, Negative slope",
color = "red", angle = 90)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment