dmil/google.R

## google.R
library(dplyr)
library(tidyr)

setwd('~/Downloads')
filename <- 'GS_Election_Poll_20161101'
df <- read.csv(paste(filename,".csv",sep=""))
grouped <- group_by(df, Question..2.Answer)
summary_df = summarise(grouped, total=sum(Weight))

summary_df_mutated <- mutate(summary_df, pct=total/sum(total))
summary_df_mutated

# Weekly Trends Tracker
get_state <- function(geo) {
  abbrev <- substr(geo,4,5)
  if(abbrev == "DC") {
    return("District of Columbia")
  }
  return(state.name[grep(abbrev,state.abb)])
}
df <- read.csv(paste(filename,".csv",sep=""))
df<-df[!(df$Weight==0),]
grouped <- group_by(df, Geography, Question..2.Answer)
summary_df <- summarise(grouped, total=sum(Weight), n=n())
summary_df_mutated <- mutate(summary_df, pct=total/sum(total))
summary_df_mutated <- subset(summary_df_mutated, select = -c(total) )
summary_df_mutated <- mutate(summary_df_mutated, n = sum(n))

x <- spread(summary_df_mutated,Question..2.Answer,pct)
headers <- c("Geography", "n", "Clinton","Trump","Johnson","Undecided","Other")
x <- x[headers]
x$Geography <- as.character(lapply(x$Geography, get_state))

x$Clinton <- x$Clinton * 100
x$Trump <- x$Trump * 100
x$Johnson <- x$Johnson * 100
x$Undecided <- x$Undecided * 100
x$Other <- x$Other * 100

write.table(x,paste(filename,"_out.csv",sep=""),sep=",",row.names=FALSE)
	library(dplyr)
	library(tidyr)

	setwd('~/Downloads')
	filename <- 'GS_Election_Poll_20161101'
	df <- read.csv(paste(filename,".csv",sep=""))
	grouped <- group_by(df, Question..2.Answer)
	summary_df = summarise(grouped, total=sum(Weight))

	summary_df_mutated <- mutate(summary_df, pct=total/sum(total))
	summary_df_mutated

	# Weekly Trends Tracker
	get_state <- function(geo) {
	abbrev <- substr(geo,4,5)
	if(abbrev == "DC") {
	return("District of Columbia")
	}
	return(state.name[grep(abbrev,state.abb)])
	}
	df <- read.csv(paste(filename,".csv",sep=""))
	df<-df[!(df$Weight==0),]
	grouped <- group_by(df, Geography, Question..2.Answer)
	summary_df <- summarise(grouped, total=sum(Weight), n=n())
	summary_df_mutated <- mutate(summary_df, pct=total/sum(total))
	summary_df_mutated <- subset(summary_df_mutated, select = -c(total) )
	summary_df_mutated <- mutate(summary_df_mutated, n = sum(n))

	x <- spread(summary_df_mutated,Question..2.Answer,pct)
	headers <- c("Geography", "n", "Clinton","Trump","Johnson","Undecided","Other")
	x <- x[headers]
	x$Geography <- as.character(lapply(x$Geography, get_state))

	x$Clinton <- x$Clinton * 100
	x$Trump <- x$Trump * 100
	x$Johnson <- x$Johnson * 100
	x$Undecided <- x$Undecided * 100
	x$Other <- x$Other * 100

	write.table(x,paste(filename,"_out.csv",sep=""),sep=",",row.names=FALSE)