Skip to content

Instantly share code, notes, and snippets.

@dmil
Last active November 1, 2016 16:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dmil/79195db4a306916ea41d8e06aa25bd54 to your computer and use it in GitHub Desktop.
Save dmil/79195db4a306916ea41d8e06aa25bd54 to your computer and use it in GitHub Desktop.
parse google polls
library(dplyr)
library(tidyr)
setwd('~/Downloads')
filename <- 'GS_Election_Poll_20161101'
df <- read.csv(paste(filename,".csv",sep=""))
grouped <- group_by(df, Question..2.Answer)
summary_df = summarise(grouped, total=sum(Weight))
summary_df_mutated <- mutate(summary_df, pct=total/sum(total))
summary_df_mutated
# Weekly Trends Tracker
get_state <- function(geo) {
abbrev <- substr(geo,4,5)
if(abbrev == "DC") {
return("District of Columbia")
}
return(state.name[grep(abbrev,state.abb)])
}
df <- read.csv(paste(filename,".csv",sep=""))
df<-df[!(df$Weight==0),]
grouped <- group_by(df, Geography, Question..2.Answer)
summary_df <- summarise(grouped, total=sum(Weight), n=n())
summary_df_mutated <- mutate(summary_df, pct=total/sum(total))
summary_df_mutated <- subset(summary_df_mutated, select = -c(total) )
summary_df_mutated <- mutate(summary_df_mutated, n = sum(n))
x <- spread(summary_df_mutated,Question..2.Answer,pct)
headers <- c("Geography", "n", "Clinton","Trump","Johnson","Undecided","Other")
x <- x[headers]
x$Geography <- as.character(lapply(x$Geography, get_state))
x$Clinton <- x$Clinton * 100
x$Trump <- x$Trump * 100
x$Johnson <- x$Johnson * 100
x$Undecided <- x$Undecided * 100
x$Other <- x$Other * 100
write.table(x,paste(filename,"_out.csv",sep=""),sep=",",row.names=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment