Last active
April 3, 2017 13:17
-
-
Save carlislerainey/fa2bbe336ed8dd97ff8fc49173507a8e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# note: for this code to work, you need to have devtools, dplyr, and | |
# ggplot2 install, as well as the data sets in your data folder. | |
# set working directory **CHANGE THIS** | |
setwd("~/Dropbox/classes/pols-209") | |
# load packages | |
library(dplyr) | |
# my info **CHANGE THIS** | |
my_name <- "Carlisle R." | |
where_to_save_predictions <- "data/wa2-predictions.csv" | |
# load evaluate_models() function from GitHub | |
gist_id <- "50416e19e96617a9591953823eef3ec2" | |
sha <- "50e75594c59c0661c426d1d53ea4fca115d93a62" | |
evaluate_models <- devtools::source_gist(id = gist_id, | |
filename = "evaluate-models.R")$value | |
# load training and prediction data | |
train_df <- readRDS("data/taxes-training.rds") | |
pred_df <- readRDS("data/taxes-prediction.rds") | |
# model formulas **CHANGE THESE** | |
f1 <- tax_change ~ lag_tax_change + personal_income # model 1 | |
f2 <- tax_change ~ lag_tax_change + population # model 2 | |
f3 <- tax_change ~ lag_tax_change + gov_request # model 3 | |
# model names **CHANGE THESE, BUT KEEP THEM SHORT (~ 25 chars.)** | |
model_names <- c("Model 1", | |
"Model 2", | |
"Model 3") | |
# fit models | |
m1 <- lm(f1, data = train_df) | |
m2 <- lm(f2, data = train_df) | |
m3 <- lm(f3, data = train_df) | |
# evaluate models | |
evaluate_models(m1, m2, m3, data = train_df, | |
group = "year", model_names = model_names) | |
# code to create the data-frame to submit | |
# note: i recommend not changing this block | |
fits <- list(m1, m2, m3) | |
submit_df <- NULL | |
for (i in 1:length(fits)) { | |
df0 <- select(pred_df, state, year) | |
df0$modeler <- my_name | |
df0$model_name <- model_names[i] | |
df0$prediction <- predict(fits[[i]], newdata = pred_df) | |
df0 <- select(df0, modeler, model_name, state, | |
year, prediction) | |
submit_df <- rbind(submit_df, df0) | |
} | |
average_df <- summarize(group_by(submit_df, state, year), | |
prediction = mean(prediction), | |
modeler = my_name) | |
average_df$model_name <- "Average" | |
combined_df <- bind_rows(submit_df, average_df) | |
write.csv(combined_df, where_to_save_predictions, row.names = FALSE) | |
# plot of predictions | |
library(ggplot2) | |
ggplot(combined_df, aes(x = prediction, y = state, color = model_name)) + | |
geom_point() + facet_wrap(~ year) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment