carlislerainey/taxes-prediction-submission.R

## taxes-prediction-submission.R

# note: for this code to work, you need to have devtools, dplyr, and
# ggplot2 install, as well as the data sets in your data folder.

# set working directory  **CHANGE THIS**
setwd("~/Dropbox/classes/pols-209")

# load packages
library(dplyr)

# my info  **CHANGE THIS**
my_name <- "Carlisle R."
where_to_save_predictions <- "data/wa2-predictions.csv"

# load evaluate_models() function from GitHub
gist_id <- "50416e19e96617a9591953823eef3ec2"
sha <- "50e75594c59c0661c426d1d53ea4fca115d93a62"
evaluate_models <- devtools::source_gist(id = gist_id,
                                         filename = "evaluate-models.R")$value

# load training and prediction data
train_df <- readRDS("data/taxes-training.rds")
pred_df <- readRDS("data/taxes-prediction.rds")

# model formulas **CHANGE THESE**
f1 <- tax_change ~ lag_tax_change + personal_income  # model 1
f2 <- tax_change ~ lag_tax_change + population  # model 2
f3 <- tax_change ~ lag_tax_change + gov_request  # model 3

# model names  **CHANGE THESE, BUT KEEP THEM SHORT (~ 25 chars.)**
model_names <- c("Model 1",
                 "Model 2",
                 "Model 3")

# fit models
m1 <- lm(f1, data = train_df)
m2 <- lm(f2, data = train_df)
m3 <- lm(f3, data = train_df)

# evaluate models
evaluate_models(m1, m2, m3, data = train_df,
                            group = "year", model_names = model_names)

# code to create the data-frame to submit
# note: i recommend not changing this block
fits <- list(m1, m2, m3)
submit_df <- NULL
for (i in 1:length(fits)) {
  df0 <- select(pred_df, state, year)
  df0$modeler <- my_name
  df0$model_name <- model_names[i]
  df0$prediction <- predict(fits[[i]], newdata = pred_df)
  df0 <- select(df0, modeler, model_name, state,
                      year, prediction)
  submit_df <- rbind(submit_df, df0)
}
average_df <- summarize(group_by(submit_df, state, year),
                        prediction = mean(prediction),
                        modeler = my_name)
average_df$model_name <- "Average"
combined_df <- bind_rows(submit_df, average_df)
write.csv(combined_df, where_to_save_predictions, row.names = FALSE)

# plot of predictions
library(ggplot2)
ggplot(combined_df, aes(x = prediction, y = state, color = model_name)) +
  geom_point() + facet_wrap(~ year)

	# note: for this code to work, you need to have devtools, dplyr, and
	# ggplot2 install, as well as the data sets in your data folder.

	# set working directory CHANGE THIS
	setwd("~/Dropbox/classes/pols-209")

	# load packages
	library(dplyr)

	# my info CHANGE THIS
	my_name <- "Carlisle R."
	where_to_save_predictions <- "data/wa2-predictions.csv"

	# load evaluate_models() function from GitHub
	gist_id <- "50416e19e96617a9591953823eef3ec2"
	sha <- "50e75594c59c0661c426d1d53ea4fca115d93a62"
	evaluate_models <- devtools::source_gist(id = gist_id,
	filename = "evaluate-models.R")$value

	# load training and prediction data
	train_df <- readRDS("data/taxes-training.rds")
	pred_df <- readRDS("data/taxes-prediction.rds")

	# model formulas CHANGE THESE
	f1 <- tax_change ~ lag_tax_change + personal_income # model 1
	f2 <- tax_change ~ lag_tax_change + population # model 2
	f3 <- tax_change ~ lag_tax_change + gov_request # model 3

	# model names CHANGE THESE, BUT KEEP THEM SHORT (~ 25 chars.)
	model_names <- c("Model 1",
	"Model 2",
	"Model 3")

	# fit models
	m1 <- lm(f1, data = train_df)
	m2 <- lm(f2, data = train_df)
	m3 <- lm(f3, data = train_df)

	# evaluate models
	evaluate_models(m1, m2, m3, data = train_df,
	group = "year", model_names = model_names)

	# code to create the data-frame to submit
	# note: i recommend not changing this block
	fits <- list(m1, m2, m3)
	submit_df <- NULL
	for (i in 1:length(fits)) {
	df0 <- select(pred_df, state, year)
	df0$modeler <- my_name
	df0$model_name <- model_names[i]
	df0$prediction <- predict(fits[[i]], newdata = pred_df)
	df0 <- select(df0, modeler, model_name, state,
	year, prediction)
	submit_df <- rbind(submit_df, df0)
	}
	average_df <- summarize(group_by(submit_df, state, year),
	prediction = mean(prediction),
	modeler = my_name)
	average_df$model_name <- "Average"
	combined_df <- bind_rows(submit_df, average_df)
	write.csv(combined_df, where_to_save_predictions, row.names = FALSE)

	# plot of predictions
	library(ggplot2)
	ggplot(combined_df, aes(x = prediction, y = state, color = model_name)) +
	geom_point() + facet_wrap(~ year)