peterhurford/ballot_initiatives_model.R

## ballot_initiatives_model.R
# Install and load libraries
if (!require("dplyr")) { install.packages("dplyr") }; library(dplyr)
if (!require("devtools")) { install.packages("devtools") }; library(devtools)
if (!require("readr")) { install.packages("readr") }; library(readr)
if (!require("recombinator")) { install_github("robertzk/recombinator") }; library(readr)


# Download data from https://docs.google.com/spreadsheets/d/1LzUHVgbyQddvESuW_WhJwNUn52023vYerlsho39em2I/edit#gid=0
states <- read_csv("~/Downloads/AR US States.csv")


# Clean data
states[["Any ban?"]] <- as.numeric(states[["Any ban?"]] == "Yes")
states[["% White"]] <- as.numeric(gsub("%", "", states[["% White"]]))
states[["Average Income"]] <- as.numeric(gsub(",", "", gsub("$", "", states[["Average Income"]], fixed=TRUE)))
states[["Average Education"]] <- as.numeric(gsub("%", "", states[["Average Education"]]))
states[["DTrifecta"]] <- as.numeric(states[["Trifecta?"]] == "D")
states[["Ag gag law?"]] <- as.numeric(states[["Ag gag law?"]] == "Yes")
states[["Right to Farm?"]] <- as.numeric(states[["Right to Farm?"]] == "Yes")


# Check correlations
vars <- c("Population", "% White", "Average Income", "Average Education", "Density", "Cook PVI", "DTrifecta", "538 Elasticity", "Restaurants / 1M People", "Overall Meat Demand", "Average Senator Score", "Red Meat Production per person", "Hog Slaughter per person", "Eggs per Person", "Ag gag law?", "Right to Farm?")
lapply(vars, function(var) list(cor.test(states[["Any ban?"]], states[[var]], use = "complete.obs"), var))


# Make an initial model
summary(lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, states))


# Get out-of-sample data for the model
outputs <- list(c("State", "Probability"))
for (i in seq(nrow(states))) {
  train <- states[-i,]
  test <- states[i,]
  model <- lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, train)
  outputs <- append(outputs, list(c(test$State, 1 / (1 + exp(-predict(model, test))))))
}
outputs <- homogeneous_recombinator(outputs)
arrange(outputs, Probability)
	# Install and load libraries
	if (!require("dplyr")) { install.packages("dplyr") }; library(dplyr)
	if (!require("devtools")) { install.packages("devtools") }; library(devtools)
	if (!require("readr")) { install.packages("readr") }; library(readr)
	if (!require("recombinator")) { install_github("robertzk/recombinator") }; library(readr)


	# Download data from https://docs.google.com/spreadsheets/d/1LzUHVgbyQddvESuW_WhJwNUn52023vYerlsho39em2I/edit#gid=0
	states <- read_csv("~/Downloads/AR US States.csv")


	# Clean data
	states[["Any ban?"]] <- as.numeric(states[["Any ban?"]] == "Yes")
	states[["% White"]] <- as.numeric(gsub("%", "", states[["% White"]]))
	states[["Average Income"]] <- as.numeric(gsub(",", "", gsub("$", "", states[["Average Income"]], fixed=TRUE)))
	states[["Average Education"]] <- as.numeric(gsub("%", "", states[["Average Education"]]))
	states[["DTrifecta"]] <- as.numeric(states[["Trifecta?"]] == "D")
	states[["Ag gag law?"]] <- as.numeric(states[["Ag gag law?"]] == "Yes")
	states[["Right to Farm?"]] <- as.numeric(states[["Right to Farm?"]] == "Yes")


	# Check correlations
	vars <- c("Population", "% White", "Average Income", "Average Education", "Density", "Cook PVI", "DTrifecta", "538 Elasticity", "Restaurants / 1M People", "Overall Meat Demand", "Average Senator Score", "Red Meat Production per person", "Hog Slaughter per person", "Eggs per Person", "Ag gag law?", "Right to Farm?")
	lapply(vars, function(var) list(cor.test(states[["Any ban?"]], states[[var]], use = "complete.obs"), var))


	# Make an initial model
	summary(lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, states))


	# Get out-of-sample data for the model
	outputs <- list(c("State", "Probability"))
	for (i in seq(nrow(states))) {
	train <- states[-i,]
	test <- states[i,]
	model <- lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, train)
	outputs <- append(outputs, list(c(test$State, 1 / (1 + exp(-predict(model, test))))))
	}
	outputs <- homogeneous_recombinator(outputs)
	arrange(outputs, Probability)