elliottmorris/trump_approval_effects.R

## trump_approval_effects.R
##################################
# Code for house-correcting Trump approval polls
# Written by Alexander Agadjanian and G. Elliott Morris
# for use on Elliott's blog, TheCrosstab.com.
# @a_agadjanian @GElliottMorris
#
# Github Gist lives at https://gist.github.com/elliottmorris/905bd514110ea7d0b599cfca8a8e7332
####################################
# libraries ####

rm(list = ls())
#setwd("/Users/Victor/Desktop/Blog-data/excel files/post election/")
library(matrixStats)
library(ggrepel)
library(maps)
library(ggthemes)
library(stringr)
library(gridExtra)
library(survey)
library(haven)
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(broom)
library(plotly)
options(stringsAsFactors = FALSE)
library(stargazer)
library(utils)
library(ggthemes)
library(ggrepel)
library(png)
library(grid)
library(gridExtra)
library(knitr)
library(reshape)
source('~/Desktop/theme_elliott.R')
source('~/Desktop/theme_elliott.R')

exponent_weight <- function(i) {
  exp(-0.2*i)
}

weight_data <- data.frame("Days" = c(1:14),"Weights" = exponent_weight(c(1:14)))
weight_data

# data ####

download.file("http://elections.huffingtonpost.com/pollster/api/v2/questions/00c%20-Pres-45-Trump%20-%20Job%20Approval%20-%20National/poll-responses-clean.tsv",
              "approvalrating.tsv")
df <- read.table("approvalrating.tsv", sep = "\t", header = TRUE)

df1 <- df %>%
  filter(sample_subpopulation == "Likely Voters" |
           sample_subpopulation == "Adults" |
           sample_subpopulation == "Registered Voters")
write.csv(df1,"cleaned_polls.csv",row.names = FALSE)
polls <- df1 %>%
  mutate(net.approval = Approve - Disapprove,
         population = ifelse(sample_subpopulation == "Adults", "1. Adults",
                             ifelse(sample_subpopulation == "Registered Voters", "2. Registered Voters",
                                    ifelse(sample_subpopulation == "Likely Voters", "3. Likely Voters", NA))),
         mode = ifelse(mode == "Live Phone", "1. Live Phone",
                       ifelse(mode == "IVR/Online", "2. IVR/Online",
                              ifelse(mode == "Internet", "3. Internet", NA))),
         field.time = difftime(end_date, start_date),
         sample = observations,
         since.inaug = difftime(end_date, "2017-01-20"),
         undecided.dk = 100 - (Approve + Disapprove))

for (i in unique(polls$survey_house)){
  polls[[i]] <- ifelse(polls$survey_house == i, paste("2. ",i),"1. Other Polls")

}

#################### Approval & Disapproval ################################

#  + field.time + undecided.dk

approval.lm <- lm(Approve ~ population + mode + since.inaug  + survey_house, data = polls)
summary(approval.lm)
df.approval.lm <- tidy(approval.lm)

# loop through to pull coefficients **********************

# creating vector that contains index term
temp <- polls[21:length(polls)]

pollsters <- colnames(temp)

# only want pollsters with 3+ polls
for (i in pollsters){
  if(nrow(polls[polls$survey_house == i,]) < 2){
    pollsters <- pollsters[pollsters != i]
  }
}

# creating empty data frame to which house effects will be attached
house <- data.frame(term = character(),
                    estimate = numeric())

# looping through regressions for each individual pollster to get house effect for each
for (i in pollsters) {

  approval.lm <- lm(polls$Approve ~ polls$population + polls$mode + polls$since.inaug + polls$field.time + polls$undecided.dk + polls[[i]])
  summary(approval.lm)
  df.approval.lm <- tidy(approval.lm) %>%
    select(term, estimate)

  df.approval.lm <- df.approval.lm[nrow(df.approval.lm),]
  if (df.approval.lm[1,]$term == "polls$undecided.dk"){next}
  print(paste(i,df.approval.lm))
  house <- rbind(house, df.approval.lm)

}

# fixing data frame that contains house effects
house$term <- str_replace_all(house$term, "polls\\[\\[i\\]\\]2.", "")
house <- house %>%
  select("Pollster" = term, "Effect" = estimate)


# order
house <- house[order(house$Effect),]
house$Pollster <- factor(house$Pollster, levels = house$Pollster[order(house$Effect)])
house$Pollster  # notice the changed order of factor levels
house$Bias <- ifelse(house$Effect <=0,"Democratic Bias","Republican Bias")


#  + field.time + undecided.dk

disapproval.lm <- lm(Disapprove ~ population + mode + since.inaug  + survey_house, data = polls)
summary(disapproval.lm)
disdf.approval.lm <- tidy(approval.lm)

# loop through to pull coefficients **********************

# creating vector that contains index term
temp <- polls[21:length(polls)]

pollsters <- colnames(temp)

# creating empty data frame to which house effects will be attached
house_2 <- data.frame(term = character(),
                    estimate = numeric())

# looping through regressions for each individual pollster to get house effect for each
for (i in pollsters) {

  disapproval.lm <- lm(polls$Disapprove ~ polls$population + polls$mode + polls$since.inaug + polls$field.time + polls$undecided.dk + polls[[i]])
  summary(disapproval.lm)
  df.disapproval.lm <- tidy(disapproval.lm) %>%
    select(term, estimate)

  df.disapproval.lm <- df.disapproval.lm[nrow(df.disapproval.lm),]
  if (df.disapproval.lm[1,]$term == "polls$undecided.dk"){next}
  print(paste(i,df.approval.lm))
  house_2 <- rbind(house_2, df.disapproval.lm)

}

# fixing data frame that contains house effects
house_2$term <- str_replace_all(house_2$term, "polls\\[\\[i\\]\\]2.", "")
house_2 <- house_2 %>%
  select("Pollster" = term, "Effect" = estimate)


# order
house_2 <- house_2[order(house_2$Effect),]
house_2$Pollster <- factor(house_2$Pollster, levels = house_2$Pollster[order(house_2$Effect)])
house_2$Pollster  # notice the changed order of factor levels
house_2$Bias <- ifelse(house_2$Effect <=0,"Democratic Bias","Republican Bias")
names(house_2) <- c("Pollster","D_Effect","Bias")
house <- left_join(house,
                   house_2 %>% select(-Bias),
                   by="Pollster")

# creating graph for pollster effects ####
pollster_effect<- ggplot() +
  geom_bar(data = house, aes(x = reorder(Pollster,Effect), y = Effect, fill = Effect),col = "#616161", stat = "identity") +
  theme_elliott() +
  labs(title = "House Effects for Trump Job Approval Polls",
       subtitle = "After taking mode, population, and date of poll into account.\nOnly computed for polling firms that have released more than 2 polls of Trump's Job Approval",
       x = "Pollster",
       y = "House Effect") +
  scale_fill_gradient2(low = "blue",mid="white",high="red") +
  scale_y_continuous(breaks=c(-10,-5,0,5,10),labels=c("-10%","-5%","0","5%","10%")) +
  coord_flip()

grid.arrange(pollster_effect,my_g,heights=c(9, .5))

png("Trump_approval.png",unit="in",height=6,width=8,res=300)

grid.arrange(pollster_effect,my_g,heights=c(9, .5))

dev.off()

kable(house,align="l",format="html",digits=2,row.names = FALSE)
## stargazing


approval.lm1 <- lm(net.approval ~ population + mode + since.inaug + field.time + undecided.dk, data = polls)

approval.lm2 <- lm(net.approval ~ population + mode + since.inaug + field.time + undecided.dk + survey_house, data = polls)

approval.lm3 <- lm(net.approval ~ population + mode + since.inaug + survey_house, data = polls)

# approve
approval.lm4 <- lm(Approve ~ population + mode + since.inaug + field.time + undecided.dk, data = polls)

approval.lm5 <- lm(Approve ~ population + mode + since.inaug + field.time + undecided.dk + survey_house, data = polls)

approval.lm6 <- lm(Approve ~ population + mode + since.inaug + survey_house, data = polls)


text<- stargazer(approval.lm1,approval.lm2,approval.lm3,approval.lm4,approval.lm5,approval.lm6, type = "html")


#box plots
polling <-  df %>%
  filter(sample_subpopulation == "Likely Voters" |
           sample_subpopulation == "Adults" |
           sample_subpopulation == "Registered Voters")


mode_effect <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,], aes(x = reorder(mode,Approve), y= Approve, col = mode)) +
  geom_boxplot(width=.4) +
  theme_elliott()  +
  coord_flip()+
  coord_flip() +
labs(title = "Mode Effects for Trump Job Approval Polls",
     subtitle = "A simple comparison of averages over the past 3 weeks",
     y = "Approval Rating (%)",
     x = "Pollster")

grid.arrange(mode_effect,my_g,heights=c(9, .5))

png("Trump_mode.png",unit="in",height=6,width=8,res=300)

grid.arrange(mode_effect,my_g,heights=c(9, .5))

dev.off()

pop_effect <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,], aes(x = reorder(sample_subpopulation,Approve), y= Approve, color = sample_subpopulation)) +
  geom_boxplot(width=.4) +
  theme_elliott()  +
  coord_flip() +
  labs(title = "Population Effects for Trump Job Approval Polls",
       subtitle = "A simple comparison of averages over the past 3 weeks",
       y = "Approval Rating (%)",
       x = "Pollster")
grid.arrange(pop_effect,my_g,heights=c(9, .5))

png("Trump_population.png",unit="in",height=6,width=8,res=300)

grid.arrange(pop_effect,my_g,heights=c(9, .5))

dev.off()

avg_compare <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,],
             aes(x = reorder(survey_house,Approve), y= Approve,color=survey_house)) +
  geom_boxplot() +
  theme_elliott()  +
  coord_flip()  +
  labs(title = "House Effects for Trump Job Approval Polls",
       subtitle = "A simple comparison of averages over the past 3 weeks",
       y = "Approval Rating (%)",
       x = "Pollster")

grid.arrange(avg_compare,my_g,heights=c(9, .5))

png("Trump_house_avg.png",unit="in",height=6,width=8,res=300)

grid.arrange(avg_compare,my_g,heights=c(9, .5))

dev.off()


##########  why not try a correction?  ###################
df2 <- df1 %>% mutate(Adjusted = NA,Adjusted_Dis = NA)
house$Pollster_split <- substr(as.character(house$Pollster),3,10000)

for (i in unique(df2$survey_house)){
  if(nrow(house[house$Pollster_split == i,])>0){
    effect <- as.numeric(house[house$Pollster_split == i,]$Effect)}else{effect <- 0}

  df2[df2$survey_house == i,]$Adjusted = df2[df2$survey_house == i,]$Approve - effect

  if(nrow(house[house$Pollster_split == i,])>0){
    dis_effect <- as.numeric(house[house$Pollster_split == i,]$D_Effect)}else{dis_effect <- 0}

  df2[df2$survey_house == i,]$Adjusted_Dis = df2[df2$survey_house == i,]$Disapprove - dis_effect
}

df2$Net <- df2$Adjusted - df2$Adjusted_Dis
df2<-df2[order(df2$end_date),]

# function for WMA

ma <- function(x,n=7){stats::filter(x,rep(1/n,n), sides=1)}

df3 <- as.data.frame(df2 %>%
  group_by(end_date) %>%
  summarise(Adjusted = mean(Adjusted),
            Adjusted_Dis = mean(Adjusted_Dis),
            Net = mean(Net)))

# for i in date
# all polls in last 2 weeks
# get days since date
# get weights
# make weighted average
# save average to that row

df3$M_Adjusted <- NA
df3$M_Adjusted_Dis <- NA

for (i in unique(df3$end_date)){
  temp <- df3 %>%
    filter((as.Date(end_date) > as.Date(i) - 15) & as.Date(end_date)<= as.Date(i)) %>%
    mutate(Days = as.numeric(difftime(as.Date(Sys.Date()),ymd(end_date))),
           weight = exponent_weight(Days))
  df3[df3$end_date == i,]$M_Adjusted = weighted.mean(temp$Adjusted,temp$weight)
  df3[df3$end_date == i,]$M_Adjusted_Dis = weighted.mean(temp$Adjusted_Dis,temp$weight)

}


df3$M_Net = df3$M_Adjusted - df3$M_Adjusted_Dis


df3 <- df3[complete.cases(df3),]
names(df3) <- as.character(names(df3))

df3.m <- melt(df3,id.vars = c("end_date","Adjusted","Adjusted_Dis","Net","M_Net"))

df3.m$variable <- gsub("M_Adjusted","Approve",df3.m$variable)
df3.m$variable <- gsub("Approve_Dis","Disapprove",df3.m$variable)

# need the points for polls
df2.m <- melt(df2, id.vars = c(names(df2)[!(names(df2) %in% c("Adjusted","Adjusted_Dis"))]))
df2.m$variable <- gsub("Adjusted","Approve",df2.m$variable)
df2.m$variable <- gsub("Approve_Dis","Disapprove",df2.m$variable)

#avg today
disapprove <- df3[as.Date(df3$end_date) == max(as.Date(df3$end_date)),]$M_Adjusted_Dis
approve <- df3[as.Date(df3$end_date) == max(as.Date(df3$end_date)),]$M_Adjusted


trump_polls_ggplot <- ggplot(df3.m,
                             aes(x=as.Date(end_date),y=value,col = variable))+
  geom_point(data=df2.m,
             aes(x=as.Date(end_date),y=value,col=variable,fill=survey_house,
                 label=value,label2=survey_house),
             shape=1,alpha=.8)+
  geom_line(size=1) +
  theme_elliott() +
  labs(x="Date",
       y="",
       title = "Trump Job Approval Polls",
       subtitle="Polls corrected for house effects and averaged with\nhigher weight given to most recent polls") +
  geom_text(aes(x = as.Date(max(as.Date(df3$end_date)))+7,
                 y=disapprove,
                 label=paste0(round(disapprove,1),"% Disapprove"),col="Disapprove")) +
  geom_text(aes(x = as.Date(max(as.Date(df3$end_date)))+7,
                 y=approve,
                 label=paste0(round(approve,1),"% Approve"),col="Approve")) +
  scale_color_manual("",values = c("Approve" = "blue","Disapprove"="red")) +
  coord_cartesian(xlim = c(as.Date("2017-01-20"),max(as.Date(df3$end_date))+15),
                  ylim = c(35,65))

  #geom_smooth(data=df2,aes(x=as.Date(end_date),y=Net),col="black",se=FALSE,span=.3)

grid.arrange(trump_polls_ggplot,my_g,heights=c(9, .5))


png("MasterPlot.png",unit="in",height=15,width=15,res=400)

grid.arrange(trump_polls_ggplot,

             grid.arrange(
               pollster_effect,
               avg_compare,
               mode_effect,
               pop_effect,
               ncol = 2),

             my_g,
             heights=c(5,9, .5)
)
dev.off()

house$Pollster = substr(as.character(house$Pollster),3,100)

left_join(df2 %>%
            select("Pollster" = survey_house,
                   "Date" = end_date,
                   Approve,
                   Disapprove,
                   "Approve (Adj.)" = Adjusted,
                   "Disapprove (Adj.)"= Adjusted_Dis) %>%
            arrange(desc(Date)),
          house %>% select(Pollster,"House Effect" = Effect),by="Pollster")
	##################################
	# Code for house-correcting Trump approval polls
	# Written by Alexander Agadjanian and G. Elliott Morris
	# for use on Elliott's blog, TheCrosstab.com.
	# @a_agadjanian @GElliottMorris
	#
	# Github Gist lives at https://gist.github.com/elliottmorris/905bd514110ea7d0b599cfca8a8e7332
	####################################
	# libraries ####

	rm(list = ls())
	#setwd("/Users/Victor/Desktop/Blog-data/excel files/post election/")
	library(matrixStats)
	library(ggrepel)
	library(maps)
	library(ggthemes)
	library(stringr)
	library(gridExtra)
	library(survey)
	library(haven)
	library(dplyr)
	library(tidyr)
	library(ggplot2)
	library(lubridate)
	library(broom)
	library(plotly)
	options(stringsAsFactors = FALSE)
	library(stargazer)
	library(utils)
	library(ggthemes)
	library(ggrepel)
	library(png)
	library(grid)
	library(gridExtra)
	library(knitr)
	library(reshape)
	source('~/Desktop/theme_elliott.R')
	source('~/Desktop/theme_elliott.R')

	exponent_weight <- function(i) {
	exp(-0.2*i)
	}

	weight_data <- data.frame("Days" = c(1:14),"Weights" = exponent_weight(c(1:14)))
	weight_data

	# data ####

	download.file("http://elections.huffingtonpost.com/pollster/api/v2/questions/00c%20-Pres-45-Trump%20-%20Job%20Approval%20-%20National/poll-responses-clean.tsv",
	"approvalrating.tsv")
	df <- read.table("approvalrating.tsv", sep = "\t", header = TRUE)

	df1 <- df %>%
	filter(sample_subpopulation == "Likely Voters" \|
	sample_subpopulation == "Adults" \|
	sample_subpopulation == "Registered Voters")
	write.csv(df1,"cleaned_polls.csv",row.names = FALSE)
	polls <- df1 %>%
	mutate(net.approval = Approve - Disapprove,
	population = ifelse(sample_subpopulation == "Adults", "1. Adults",
	ifelse(sample_subpopulation == "Registered Voters", "2. Registered Voters",
	ifelse(sample_subpopulation == "Likely Voters", "3. Likely Voters", NA))),
	mode = ifelse(mode == "Live Phone", "1. Live Phone",
	ifelse(mode == "IVR/Online", "2. IVR/Online",
	ifelse(mode == "Internet", "3. Internet", NA))),
	field.time = difftime(end_date, start_date),
	sample = observations,
	since.inaug = difftime(end_date, "2017-01-20"),
	undecided.dk = 100 - (Approve + Disapprove))

	for (i in unique(polls$survey_house)){
	polls[[i]] <- ifelse(polls$survey_house == i, paste("2. ",i),"1. Other Polls")

	}

	#################### Approval & Disapproval ################################

	# + field.time + undecided.dk

	approval.lm <- lm(Approve ~ population + mode + since.inaug + survey_house, data = polls)
	summary(approval.lm)
	df.approval.lm <- tidy(approval.lm)

	# loop through to pull coefficients **********************

	# creating vector that contains index term
	temp <- polls[21:length(polls)]

	pollsters <- colnames(temp)

	# only want pollsters with 3+ polls
	for (i in pollsters){
	if(nrow(polls[polls$survey_house == i,]) < 2){
	pollsters <- pollsters[pollsters != i]
	}
	}

	# creating empty data frame to which house effects will be attached
	house <- data.frame(term = character(),
	estimate = numeric())

	# looping through regressions for each individual pollster to get house effect for each
	for (i in pollsters) {

	approval.lm <- lm(polls$Approve ~ polls$population + polls$mode + polls$since.inaug + polls$field.time + polls$undecided.dk + polls[[i]])
	summary(approval.lm)
	df.approval.lm <- tidy(approval.lm) %>%
	select(term, estimate)

	df.approval.lm <- df.approval.lm[nrow(df.approval.lm),]
	if (df.approval.lm[1,]$term == "polls$undecided.dk"){next}
	print(paste(i,df.approval.lm))
	house <- rbind(house, df.approval.lm)

	}

	# fixing data frame that contains house effects
	house$term <- str_replace_all(house$term, "polls\\[\\[i\\]\\]2.", "")
	house <- house %>%
	select("Pollster" = term, "Effect" = estimate)


	# order
	house <- house[order(house$Effect),]
	house$Pollster <- factor(house$Pollster, levels = house$Pollster[order(house$Effect)])
	house$Pollster # notice the changed order of factor levels
	house$Bias <- ifelse(house$Effect <=0,"Democratic Bias","Republican Bias")


	# + field.time + undecided.dk

	disapproval.lm <- lm(Disapprove ~ population + mode + since.inaug + survey_house, data = polls)
	summary(disapproval.lm)
	disdf.approval.lm <- tidy(approval.lm)

	# loop through to pull coefficients **********************

	# creating vector that contains index term
	temp <- polls[21:length(polls)]

	pollsters <- colnames(temp)

	# creating empty data frame to which house effects will be attached
	house_2 <- data.frame(term = character(),
	estimate = numeric())

	# looping through regressions for each individual pollster to get house effect for each
	for (i in pollsters) {

	disapproval.lm <- lm(polls$Disapprove ~ polls$population + polls$mode + polls$since.inaug + polls$field.time + polls$undecided.dk + polls[[i]])
	summary(disapproval.lm)
	df.disapproval.lm <- tidy(disapproval.lm) %>%
	select(term, estimate)

	df.disapproval.lm <- df.disapproval.lm[nrow(df.disapproval.lm),]
	if (df.disapproval.lm[1,]$term == "polls$undecided.dk"){next}
	print(paste(i,df.approval.lm))
	house_2 <- rbind(house_2, df.disapproval.lm)

	}

	# fixing data frame that contains house effects
	house_2$term <- str_replace_all(house_2$term, "polls\\[\\[i\\]\\]2.", "")
	house_2 <- house_2 %>%
	select("Pollster" = term, "Effect" = estimate)


	# order
	house_2 <- house_2[order(house_2$Effect),]
	house_2$Pollster <- factor(house_2$Pollster, levels = house_2$Pollster[order(house_2$Effect)])
	house_2$Pollster # notice the changed order of factor levels
	house_2$Bias <- ifelse(house_2$Effect <=0,"Democratic Bias","Republican Bias")
	names(house_2) <- c("Pollster","D_Effect","Bias")
	house <- left_join(house,
	house_2 %>% select(-Bias),
	by="Pollster")

	# creating graph for pollster effects ####
	pollster_effect<- ggplot() +
	geom_bar(data = house, aes(x = reorder(Pollster,Effect), y = Effect, fill = Effect),col = "#616161", stat = "identity") +
	theme_elliott() +
	labs(title = "House Effects for Trump Job Approval Polls",
	subtitle = "After taking mode, population, and date of poll into account.\nOnly computed for polling firms that have released more than 2 polls of Trump's Job Approval",
	x = "Pollster",
	y = "House Effect") +
	scale_fill_gradient2(low = "blue",mid="white",high="red") +
	scale_y_continuous(breaks=c(-10,-5,0,5,10),labels=c("-10%","-5%","0","5%","10%")) +
	coord_flip()

	grid.arrange(pollster_effect,my_g,heights=c(9, .5))

	png("Trump_approval.png",unit="in",height=6,width=8,res=300)

	grid.arrange(pollster_effect,my_g,heights=c(9, .5))

	dev.off()

	kable(house,align="l",format="html",digits=2,row.names = FALSE)
	## stargazing


	approval.lm1 <- lm(net.approval ~ population + mode + since.inaug + field.time + undecided.dk, data = polls)

	approval.lm2 <- lm(net.approval ~ population + mode + since.inaug + field.time + undecided.dk + survey_house, data = polls)

	approval.lm3 <- lm(net.approval ~ population + mode + since.inaug + survey_house, data = polls)

	# approve
	approval.lm4 <- lm(Approve ~ population + mode + since.inaug + field.time + undecided.dk, data = polls)

	approval.lm5 <- lm(Approve ~ population + mode + since.inaug + field.time + undecided.dk + survey_house, data = polls)

	approval.lm6 <- lm(Approve ~ population + mode + since.inaug + survey_house, data = polls)


	text<- stargazer(approval.lm1,approval.lm2,approval.lm3,approval.lm4,approval.lm5,approval.lm6, type = "html")





	#box plots
	polling <- df %>%
	filter(sample_subpopulation == "Likely Voters" \|
	sample_subpopulation == "Adults" \|
	sample_subpopulation == "Registered Voters")


	mode_effect <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,], aes(x = reorder(mode,Approve), y= Approve, col = mode)) +
	geom_boxplot(width=.4) +
	theme_elliott() +
	coord_flip()+
	coord_flip() +
	labs(title = "Mode Effects for Trump Job Approval Polls",
	subtitle = "A simple comparison of averages over the past 3 weeks",
	y = "Approval Rating (%)",
	x = "Pollster")

	grid.arrange(mode_effect,my_g,heights=c(9, .5))

	png("Trump_mode.png",unit="in",height=6,width=8,res=300)

	grid.arrange(mode_effect,my_g,heights=c(9, .5))

	dev.off()

	pop_effect <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,], aes(x = reorder(sample_subpopulation,Approve), y= Approve, color = sample_subpopulation)) +
	geom_boxplot(width=.4) +
	theme_elliott() +
	coord_flip() +
	labs(title = "Population Effects for Trump Job Approval Polls",
	subtitle = "A simple comparison of averages over the past 3 weeks",
	y = "Approval Rating (%)",
	x = "Pollster")
	grid.arrange(pop_effect,my_g,heights=c(9, .5))

	png("Trump_population.png",unit="in",height=6,width=8,res=300)

	grid.arrange(pop_effect,my_g,heights=c(9, .5))

	dev.off()

	avg_compare <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,],
	aes(x = reorder(survey_house,Approve), y= Approve,color=survey_house)) +
	geom_boxplot() +
	theme_elliott() +
	coord_flip() +
	labs(title = "House Effects for Trump Job Approval Polls",
	subtitle = "A simple comparison of averages over the past 3 weeks",
	y = "Approval Rating (%)",
	x = "Pollster")

	grid.arrange(avg_compare,my_g,heights=c(9, .5))

	png("Trump_house_avg.png",unit="in",height=6,width=8,res=300)

	grid.arrange(avg_compare,my_g,heights=c(9, .5))

	dev.off()



	########## why not try a correction? ###################
	df2 <- df1 %>% mutate(Adjusted = NA,Adjusted_Dis = NA)
	house$Pollster_split <- substr(as.character(house$Pollster),3,10000)

	for (i in unique(df2$survey_house)){
	if(nrow(house[house$Pollster_split == i,])>0){
	effect <- as.numeric(house[house$Pollster_split == i,]$Effect)}else{effect <- 0}

	df2[df2$survey_house == i,]$Adjusted = df2[df2$survey_house == i,]$Approve - effect

	if(nrow(house[house$Pollster_split == i,])>0){
	dis_effect <- as.numeric(house[house$Pollster_split == i,]$D_Effect)}else{dis_effect <- 0}

	df2[df2$survey_house == i,]$Adjusted_Dis = df2[df2$survey_house == i,]$Disapprove - dis_effect
	}

	df2$Net <- df2$Adjusted - df2$Adjusted_Dis
	df2<-df2[order(df2$end_date),]

	# function for WMA

	ma <- function(x,n=7){stats::filter(x,rep(1/n,n), sides=1)}

	df3 <- as.data.frame(df2 %>%
	group_by(end_date) %>%
	summarise(Adjusted = mean(Adjusted),
	Adjusted_Dis = mean(Adjusted_Dis),
	Net = mean(Net)))

	# for i in date
	# all polls in last 2 weeks
	# get days since date
	# get weights
	# make weighted average
	# save average to that row

	df3$M_Adjusted <- NA
	df3$M_Adjusted_Dis <- NA

	for (i in unique(df3$end_date)){
	temp <- df3 %>%
	filter((as.Date(end_date) > as.Date(i) - 15) & as.Date(end_date)<= as.Date(i)) %>%
	mutate(Days = as.numeric(difftime(as.Date(Sys.Date()),ymd(end_date))),
	weight = exponent_weight(Days))
	df3[df3$end_date == i,]$M_Adjusted = weighted.mean(temp$Adjusted,temp$weight)
	df3[df3$end_date == i,]$M_Adjusted_Dis = weighted.mean(temp$Adjusted_Dis,temp$weight)

	}


	df3$M_Net = df3$M_Adjusted - df3$M_Adjusted_Dis


	df3 <- df3[complete.cases(df3),]
	names(df3) <- as.character(names(df3))

	df3.m <- melt(df3,id.vars = c("end_date","Adjusted","Adjusted_Dis","Net","M_Net"))

	df3.m$variable <- gsub("M_Adjusted","Approve",df3.m$variable)
	df3.m$variable <- gsub("Approve_Dis","Disapprove",df3.m$variable)

	# need the points for polls
	df2.m <- melt(df2, id.vars = c(names(df2)[!(names(df2) %in% c("Adjusted","Adjusted_Dis"))]))
	df2.m$variable <- gsub("Adjusted","Approve",df2.m$variable)
	df2.m$variable <- gsub("Approve_Dis","Disapprove",df2.m$variable)

	#avg today
	disapprove <- df3[as.Date(df3$end_date) == max(as.Date(df3$end_date)),]$M_Adjusted_Dis
	approve <- df3[as.Date(df3$end_date) == max(as.Date(df3$end_date)),]$M_Adjusted


	trump_polls_ggplot <- ggplot(df3.m,
	aes(x=as.Date(end_date),y=value,col = variable))+
	geom_point(data=df2.m,
	aes(x=as.Date(end_date),y=value,col=variable,fill=survey_house,
	label=value,label2=survey_house),
	shape=1,alpha=.8)+
	geom_line(size=1) +
	theme_elliott() +
	labs(x="Date",
	y="",
	title = "Trump Job Approval Polls",
	subtitle="Polls corrected for house effects and averaged with\nhigher weight given to most recent polls") +
	geom_text(aes(x = as.Date(max(as.Date(df3$end_date)))+7,
	y=disapprove,
	label=paste0(round(disapprove,1),"% Disapprove"),col="Disapprove")) +
	geom_text(aes(x = as.Date(max(as.Date(df3$end_date)))+7,
	y=approve,
	label=paste0(round(approve,1),"% Approve"),col="Approve")) +
	scale_color_manual("",values = c("Approve" = "blue","Disapprove"="red")) +
	coord_cartesian(xlim = c(as.Date("2017-01-20"),max(as.Date(df3$end_date))+15),
	ylim = c(35,65))

	#geom_smooth(data=df2,aes(x=as.Date(end_date),y=Net),col="black",se=FALSE,span=.3)

	grid.arrange(trump_polls_ggplot,my_g,heights=c(9, .5))


	png("MasterPlot.png",unit="in",height=15,width=15,res=400)

	grid.arrange(trump_polls_ggplot,

	grid.arrange(
	pollster_effect,
	avg_compare,
	mode_effect,
	pop_effect,
	ncol = 2),

	my_g,
	heights=c(5,9, .5)
	)
	dev.off()

	house$Pollster = substr(as.character(house$Pollster),3,100)

	left_join(df2 %>%
	select("Pollster" = survey_house,
	"Date" = end_date,
	Approve,
	Disapprove,
	"Approve (Adj.)" = Adjusted,
	"Disapprove (Adj.)"= Adjusted_Dis) %>%
	arrange(desc(Date)),
	house %>% select(Pollster,"House Effect" = Effect),by="Pollster")