thoughtfulbloke/tollAndWork.R

## tollAndWork.R
library(OECD)
library(feather)
library(dplyr)
library(countrycode)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
library(viridis)

# create folder for storing data if it does not already exist locally

dataFolder= "traffic_data"
if(!dir.exists(dataFolder)){
  dir.create(dataFolder)
}

# get road accident data if it does not already exist locally
dataset <- "ITF_ROAD_ACCIDENTS"
accidentfile <- paste0(dataFolder,"/OECD_",dataset, ".feather")
if(!file.exists(accidentfile)){
  dstruc <- get_data_structure(dataset)
  df <- get_dataset(dataset)
  df2 <- merge(df, dstruc$UNIT, by.x="UNIT", by.y="id")
  names(df2)[9] <- "UNIT_label"
  df3 <- merge(df2, dstruc$VARIABLE, by.x="VARIABLE", by.y="id")
  names(df3)[10] <- "VARIABLE_label"
  write_feather(df3, path=accidentfile)
}

# get average work hours data if it does not exist locally

dataset <- "ANHRS"
workfile <- paste0(dataFolder,"/OECD_",dataset, ".feather")
if(!file.exists(workfile)){
  dstruc <- get_data_structure(dataset)
  df <- get_dataset(dataset)
  df2 <- merge(df, dstruc$EMPSTAT, by.x="EMPSTAT", by.y="id")
  names(df2)[7] <- "EMPSTAT_label"
  write_feather(df, path=workfile)
}

rm(list = setdiff(ls(), c('accidentfile','workfile')))

# read in accident data
ITF_ROAD_ACCIDENTS <- read_feather(accidentfile) %>%
  filter(VARIABLE == "T-ACCI-KIL") %>%
  mutate(obsTime = as.integer(obsTime)) %>%
  select(COUNTRY, obsTime, toll = obsValue)

# read in work hours

OECD <- read_feather(workfile)

# combine fatalities from the accident data with the population data

combo <- OECD %>%
  filter(EMPSTAT == "TE") %>%
  mutate(obsTime = as.integer(obsTime)) %>%
  select(COUNTRY, obsTime, work = obsValue) %>%
  inner_join(ITF_ROAD_ACCIDENTS)

# make a lot of linear models (one for each country) and get their summaries
country_model <- function(df) {
  lm(dependent ~ independent, data = df)
}

get_slope <- function(mdL) {
  if (mdL$coefficients[2] < 0){
    return(-1)
  } else{
    return(1)
  }
}

comparisons <- combo %>%
  arrange(COUNTRY, obsTime) %>% group_by(COUNTRY) %>%
  mutate(independent = work,
         dependent = toll) %>%
  nest() %>%
  mutate(model = map(data, country_model),
         slope = map(model, get_slope),
         glance = map(model, broom::glance)) %>%
  unnest(glance, slope) %>% mutate(significance = (1-p.value)*slope)

comparisons %>%
  mutate(state = if_else(COUNTRY=="NZL", "New Zealand", "Everyone Else")) %>%
ggplot(aes(x=slope, y=r.squared, colour=state, shape=state)) + geom_point() +
  ggtitle("Road Toll ~ Work hours, 1177 obs, 36 countries (data ITF/OECD)") +
  ylim(0,1) + xlim(-1, 1) +
  scale_color_viridis(discrete=TRUE, end=0.9) + theme_bw() +
  xlab("Significance of model") + ylab("Fit of Data (r squared)") +
  geom_vline(xintercept=0.95, colour="red") +
  geom_vline(xintercept=-0.95, colour="red") +
  annotate(geom = "text", x = 0.90, y = 0.5,
           label = "0.95 Significance, Postive slope",
           color = "red", angle = -90) +
  annotate(geom = "text", x = -0.90, y = 0.5,
           label = "0.95 Significance, Negative slope",
           color = "red", angle = 90)
	library(OECD)
	library(feather)
	library(dplyr)
	library(countrycode)
	library(tidyr)
	library(purrr)
	library(broom)
	library(ggplot2)
	library(viridis)

	# create folder for storing data if it does not already exist locally

	dataFolder= "traffic_data"
	if(!dir.exists(dataFolder)){
	dir.create(dataFolder)
	}

	# get road accident data if it does not already exist locally
	dataset <- "ITF_ROAD_ACCIDENTS"
	accidentfile <- paste0(dataFolder,"/OECD_",dataset, ".feather")
	if(!file.exists(accidentfile)){
	dstruc <- get_data_structure(dataset)
	df <- get_dataset(dataset)
	df2 <- merge(df, dstruc$UNIT, by.x="UNIT", by.y="id")
	names(df2)[9] <- "UNIT_label"
	df3 <- merge(df2, dstruc$VARIABLE, by.x="VARIABLE", by.y="id")
	names(df3)[10] <- "VARIABLE_label"
	write_feather(df3, path=accidentfile)
	}

	# get average work hours data if it does not exist locally

	dataset <- "ANHRS"
	workfile <- paste0(dataFolder,"/OECD_",dataset, ".feather")
	if(!file.exists(workfile)){
	dstruc <- get_data_structure(dataset)
	df <- get_dataset(dataset)
	df2 <- merge(df, dstruc$EMPSTAT, by.x="EMPSTAT", by.y="id")
	names(df2)[7] <- "EMPSTAT_label"
	write_feather(df, path=workfile)
	}

	rm(list = setdiff(ls(), c('accidentfile','workfile')))

	# read in accident data
	ITF_ROAD_ACCIDENTS <- read_feather(accidentfile) %>%
	filter(VARIABLE == "T-ACCI-KIL") %>%
	mutate(obsTime = as.integer(obsTime)) %>%
	select(COUNTRY, obsTime, toll = obsValue)

	# read in work hours

	OECD <- read_feather(workfile)

	# combine fatalities from the accident data with the population data

	combo <- OECD %>%
	filter(EMPSTAT == "TE") %>%
	mutate(obsTime = as.integer(obsTime)) %>%
	select(COUNTRY, obsTime, work = obsValue) %>%
	inner_join(ITF_ROAD_ACCIDENTS)

	# make a lot of linear models (one for each country) and get their summaries
	country_model <- function(df) {
	lm(dependent ~ independent, data = df)
	}

	get_slope <- function(mdL) {
	if (mdL$coefficients[2] < 0){
	return(-1)
	} else{
	return(1)
	}
	}

	comparisons <- combo %>%
	arrange(COUNTRY, obsTime) %>% group_by(COUNTRY) %>%
	mutate(independent = work,
	dependent = toll) %>%
	nest() %>%
	mutate(model = map(data, country_model),
	slope = map(model, get_slope),
	glance = map(model, broom::glance)) %>%
	unnest(glance, slope) %>% mutate(significance = (1-p.value)*slope)

	comparisons %>%
	mutate(state = if_else(COUNTRY=="NZL", "New Zealand", "Everyone Else")) %>%
	ggplot(aes(x=slope, y=r.squared, colour=state, shape=state)) + geom_point() +
	ggtitle("Road Toll ~ Work hours, 1177 obs, 36 countries (data ITF/OECD)") +
	ylim(0,1) + xlim(-1, 1) +
	scale_color_viridis(discrete=TRUE, end=0.9) + theme_bw() +
	xlab("Significance of model") + ylab("Fit of Data (r squared)") +
	geom_vline(xintercept=0.95, colour="red") +
	geom_vline(xintercept=-0.95, colour="red") +
	annotate(geom = "text", x = 0.90, y = 0.5,
	label = "0.95 Significance, Postive slope",
	color = "red", angle = -90) +
	annotate(geom = "text", x = -0.90, y = 0.5,
	label = "0.95 Significance, Negative slope",
	color = "red", angle = 90)