Skip to content

Instantly share code, notes, and snippets.

@RobWiederstein
Created April 11, 2020 02:30
Show Gist options
  • Save RobWiederstein/42b05dc91dbe59349368078563d92759 to your computer and use it in GitHub Desktop.
Save RobWiederstein/42b05dc91dbe59349368078563d92759 to your computer and use it in GitHub Desktop.
#install.packages(x)
x <- c("ggmap", "rgdal", "rgeos", "maptools", "dplyr", "tidyr", "tmap",
"raster")
lapply(x, library, character.only = TRUE)
rm(x)
#read in
hd <- readOGR(dsn = "./data_raw/shp/HH001M01",
layer = "HH001M01",
stringsAsFactors = F
)
#change from text to numeric
hd@data$DISTRICT <- as.integer(hd@data$DISTRICT)
# area in sq_kilometers --but which areas is it
hd@data$area <- area(hd) / 1000
#registered voters
file <- "./data_raw/tabula-house_district_registrations.csv"
df <- read.csv(file = file, header = T, sep = ",", colClasses = "character")
df$pct_dem <- as.integer(df$Dem) / as.integer(df$Registered)
df$pct_rep <- as.integer(df$Rep) / as.integer(df$Registered)
df$registered <- as.integer(df$Registered)
df <- dplyr::select(df, District, pct_dem, pct_rep, registered)
names(df)[1] <- "DISTRICT"
#get district number--may need for merge
df$DISTRICT <- substr(df$DISTRICT, start = 1, stop = 3)
df$DISTRICT <- gsub("^00|^0", "", df$DISTRICT)
df$DISTRICT <- as.integer(df$DISTRICT)
#just read in the election results--it'd probably be faster
file <- "./data_raw/sos//2018-election_results.csv"
df.r <- read.csv(file = file, header = T, sep = ",", colClasses = "character")
df.r <- df.r[, c(2:6)]
#need district number
names(df.r) <- c("rep_dist", "name", "party", "votes", "pct")
a <- unlist(lapply(strsplit(df.r$rep_dist, split = " "), "[", 3))
a <- gsub("st|nd|rd|th", "", a)
df.r$district <- as.integer(a)
df.r$district
#District R or D
df.r$party <- gsub("DEM", "D", df.r$party)
df.r$party <- gsub("REP", "R", df.r$party)
#filter to winners
df.r$pct <- as.numeric(df.r$pct)
df.r1 <- dplyr::filter(df.r, pct >= 50)
df.r1 <- df.r1[-12, ] #dj johnson
#add back steven rudy
df.rudy <- data.frame(rep_dist = c("State Representative, 1st Representative District"),
name = c("Steven RUDY"),
party = c("R"),
votes = c("11049"),
pct = c("67.0"),
district = c("1"))
df.r1 <- rbind(df.r1, df.rudy)
rm(df.r, df.rudy, a)
names(df.r1)[6] <- "DISTRICT"
df.1 <- merge(df, df.r1, by = "DISTRICT")
rm(df, df.r1)
df.1 <- dplyr::select(df.1, DISTRICT, name, party, votes, pct,
pct_dem, pct_rep, registered)
#merge with sp data frame
hd <- sp::merge(hd, df.1)
#create density variable
hd@data$density <- hd@data$registered / hd@data$area
#create factor variable on population density
hd@data$quintile <- cut_number(as.numeric(hd@data$density), n = 5)
hd@data$quintile <- factor(hd@data$quintile,
labels = c("rural",
"rural suburban",
"suburban",
"dense suburban",
"urban")
)
file <- "./data_tidy/2018_house_district_pop_density.csv"
write.csv(hd@data, file = file, row.names = F)
save(hd, file = c("./data_tidy/2018_house_district_pop_density"))
#create spatial polygon object
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment