Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created December 5, 2017 07:56
Show Gist options
  • Save thoughtfulbloke/1b2eeb054b5fa1e1e16b840e6d01f7a6 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/1b2eeb054b5fa1e1e16b840e6d01f7a6 to your computer and use it in GitHub Desktop.
library(OECD)
library(feather)
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
library(viridis)
dataFolder= "traffic_data"
dataset <- "FAMILY"
familyfile <- paste0(dataFolder,"/OECD_",dataset, ".feather")
if(!dir.exists(dataFolder)){
dir.create(dataFolder)
}
if(!file.exists(familyfile)){
dstruc <- get_data_structure(dataset)
df <- get_dataset(dataset)
df2 <- merge(df, dstruc$UNIT, by.x="UNIT", by.y="id")
names(df2)[9] <- "UNIT_label"
df3 <- merge(df2, dstruc$IND, by.x="IND", by.y="id")
names(df3)[10] <- "IND_label"
write_feather(df3, path=familyfile)
}
rm(list = setdiff(ls(), c('familyfile')))
FAM <- read_feather(familyfile)
# check indicator names
FAM %>% select(IND, IND_label) %>% distinct() %>% View()
country_model <- function(df) {
lm(dependent ~ independent, data = df)
}
get_slope <- function(mdL) {
mdL$coefficients[2]
}
FAM %>% filter(IND == "FAM16A" | IND == "FAM4B") %>%
mutate(obsTime = as.integer(obsTime)) %>%
select(COU, obsTime, IND, obsValue) %>%
arrange(COU, obsTime) %>% spread(IND, obsValue) %>%
filter(!is.na(FAM16A), !is.na(FAM4B)) %>% glimpse() %>%
arrange(COU, obsTime) %>% group_by(COU) %>%
mutate(independent = FAM4B,
dependent = FAM16A) %>%
nest() %>%
mutate(model = map(data, country_model),
slope = map(model, get_slope),
glance = map(model, broom::glance)) %>%
unnest(glance, slope) %>% mutate(significance = (1-p.value)*slope) %>%
mutate(state = if_else(COU=="NZL", "New Zealand", "Everyone Else")) %>%
glimpse() %>%
ggplot(aes(x=slope, y=r.squared, colour=state)) + geom_point() +
ylim(0,1) + xlab("model slope (further from 0 is more dramatic relationship)") +
ylab("r.squared (higher is data fits model better)") +
ggtitle("1604 obs from 39 countries (data OECD:FAMILY dataset)\nAs divorce rates increase infant mortality decreases")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment