Skip to content

Instantly share code, notes, and snippets.

@carlislerainey
Last active April 3, 2017 13:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carlislerainey/fa2bbe336ed8dd97ff8fc49173507a8e to your computer and use it in GitHub Desktop.
Save carlislerainey/fa2bbe336ed8dd97ff8fc49173507a8e to your computer and use it in GitHub Desktop.
# note: for this code to work, you need to have devtools, dplyr, and
# ggplot2 install, as well as the data sets in your data folder.
# set working directory **CHANGE THIS**
setwd("~/Dropbox/classes/pols-209")
# load packages
library(dplyr)
# my info **CHANGE THIS**
my_name <- "Carlisle R."
where_to_save_predictions <- "data/wa2-predictions.csv"
# load evaluate_models() function from GitHub
gist_id <- "50416e19e96617a9591953823eef3ec2"
sha <- "50e75594c59c0661c426d1d53ea4fca115d93a62"
evaluate_models <- devtools::source_gist(id = gist_id,
filename = "evaluate-models.R")$value
# load training and prediction data
train_df <- readRDS("data/taxes-training.rds")
pred_df <- readRDS("data/taxes-prediction.rds")
# model formulas **CHANGE THESE**
f1 <- tax_change ~ lag_tax_change + personal_income # model 1
f2 <- tax_change ~ lag_tax_change + population # model 2
f3 <- tax_change ~ lag_tax_change + gov_request # model 3
# model names **CHANGE THESE, BUT KEEP THEM SHORT (~ 25 chars.)**
model_names <- c("Model 1",
"Model 2",
"Model 3")
# fit models
m1 <- lm(f1, data = train_df)
m2 <- lm(f2, data = train_df)
m3 <- lm(f3, data = train_df)
# evaluate models
evaluate_models(m1, m2, m3, data = train_df,
group = "year", model_names = model_names)
# code to create the data-frame to submit
# note: i recommend not changing this block
fits <- list(m1, m2, m3)
submit_df <- NULL
for (i in 1:length(fits)) {
df0 <- select(pred_df, state, year)
df0$modeler <- my_name
df0$model_name <- model_names[i]
df0$prediction <- predict(fits[[i]], newdata = pred_df)
df0 <- select(df0, modeler, model_name, state,
year, prediction)
submit_df <- rbind(submit_df, df0)
}
average_df <- summarize(group_by(submit_df, state, year),
prediction = mean(prediction),
modeler = my_name)
average_df$model_name <- "Average"
combined_df <- bind_rows(submit_df, average_df)
write.csv(combined_df, where_to_save_predictions, row.names = FALSE)
# plot of predictions
library(ggplot2)
ggplot(combined_df, aes(x = prediction, y = state, color = model_name)) +
geom_point() + facet_wrap(~ year)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment