Skip to content

Instantly share code, notes, and snippets.

@chrishanretty
Created February 10, 2013 18:21
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save chrishanretty/4750469 to your computer and use it in GitHub Desktop.
Pooling the polls for Italy, final version
### Takes as input a Google Docs spreadsheet
### Outputs national and regional level estimates of support
### over time
### Load libraries
library(R2WinBUGS)
library(rjags)
library(RCurl)
library(reshape)
library(foreach)
library(car)
library(RColorBrewer)
library(ggplot2)
library(scales)
library(doMC)
registerDoMC(cores=4)
### Set up configs.
start.date <- as.Date("2012-12-10")
n.iter <- 2e6
n.burn <- n.iter / 4
n.thin <- n.iter/1000
the.date <- Sys.Date()
### Get data
gdoc <- getURL("https://docs.google.com/spreadsheet/pub?key=0AlLkeTCb2GrxdFFVeEY1bS1nU3N4MnF2NnduQVNtOUE&single=true&gid=0&output=csv")
dat <- read.csv(textConnection(gdoc),na.strings=c("","NA"))
### Clean up numeric information
pct.vars <- 9:23
dat[,pct.vars] <- apply(dat[,pct.vars],2,function(x){
as.numeric(gsub("%","",gsub(",",".",x)))/100
})
### Clean up date information
date.vars <- 2:5
dat[,date.vars] <- apply(dat[,date.vars],2,function(x){
julian(as.Date(as.character(x),format="%d/%m/%Y"),start.date)
})
### Subset to dates after start date
dat$avg.date <- floor(rowMeans(dat[,c("DateFrom","DateTo")])) + 1
dat <- subset(dat,avg.date > 0)
### Clean up nRespondents
dat$nRespondents[is.na(dat$nRespondents)] <- 200
### Clean up company information
dat$Company <- recode(dat$Company,
"c('Datamonitor')='DATAMONITOR';
c('Demos-Demetra','Demos&Pi')='Demos';
c('Ipsos','Ipsos pa','Ipsos srl')='Ipsos srl';
c('Istituto Piepoli','Istituto Piepoli Spa')='Istituto Piepoli';
c('tecne\\'','tecnè','Tecnè')='Tecnè'")
### Convert to factor
### Explicit listing of companies is brittle
### But necessary for the Bugs scripts
company.levels <- c("SpinCon",
"DATAMONITOR",
"SWG spa",
"DEMOPOLIS - Istituto Nazionale di Ricerche",
"ISPO RICERCHE S.R.L.",
"Lorien Consulting",
"Tecnè",
"Quorum",
"EMG Srl",
"Demos",
"Euromedia Research",
"IPR Marketing",
"Istituto Piepoli",
#"Epoké. Ricerche Sociali Applicate",
"Ipsos srl")
dat$Company <- factor(dat$Company,
levels = company.levels,
ordered=TRUE)
dat <- dat[!is.na(dat$Company),]
### Save the clean version
save(dat,file= paste(the.date,"_setup.RData",sep=""))
#################################################################
## JAGS MODEL!
#################################################################
model1 <- function() {
## measurement model
for(i in 1:nPolls){
mu[i] <- house[org[i]] + alpha[date[i]] # + senfx * isSenate[i]
y[i] ~ dnorm(mu[i],prec[i]) # %_%T(0,1)
}
## transition model (aka random walk prior)
for(i in 2:nPeriods){
mu.alpha[i] <- alpha[i-1]
alpha[i] ~ dnorm(mu.alpha[i],tau) # %_%T(0,1)
}
## priors
tau <- 1/pow(sigma,2) ## deterministic transform to precision
sigma ~ dunif(0,.01) ## uniform prior on standard deviation
alpha[1] ~ dunif(0,.5) ## initialization of daily track
#senfx ~ dnorm(0,0.01)
for(i in 1:nOrgs){ ## normal priors for house effects
house.star[i] ~ dnorm(0,1000)
house[i] <- house.star[i] - mean(house.star)
}
}
write.model(model1,con="model1.bug")
### Do loop around parties
the.parties <- names(dat)[pct.vars]
the.parties <- setdiff(the.parties,"Lista.Monti.Senato")
res <- foreach(i = the.parties) %dopar% {
good <- !is.na(dat[,i]) & (dat$Area == "Italy") & (dat$Ramo=="Camera")
if (i == "Altri.CDX") {
good <- !is.na(dat[,i]) &
!is.na(dat[,"Fratelli.d.Italia"]) &
!is.na(dat[,"Destra"]) &
(dat$Area == "Italy")
}
y <- dat[good,i]
var <- (y)*(1-(y))/(dat$nRespondents[good])
prec <- 1/var
date <- dat$avg.date[good]
org=as.integer(dat$Company[good])
isSenate = (dat$Ramo == "Senato")
nOrgs = length(levels(dat$Company))
nPolls=length(y[good])
nPeriods=length(min(dat$avg.date):max(dat$avg.date))
alpha=rep(NA,nPeriods)
forJags <- list(y=y,prec=prec,date=date,org=org,nOrgs=nOrgs,nPolls=nPolls,
isSenate = isSenate,
nPeriods = nPeriods,alpha=alpha)
initfunc <- function(party){
house.star <- rnorm(n=forJags$nOrgs,sd=.075)
nPeriods = length(min(dat$avg.date):max(dat$avg.date))
sigma <- runif(n=1,0,.01)
alpha <- rep(NA,nPeriods)
if (i == "PD") {
alpha[1] <- .28
} else if (i == "PDL") {
alpha[1] <- .17
} else if (i == "M5S") {
alpha[1] <- .15
} else if (i == "Lega") {
alpha[1] <- .05
} else if (i == "Destra") {
alpha[1] <- .015
} else if (i == "Fratelli.d.Italia") {
alpha[1] <- .015
} else if (i == "Altri.CDX") {
alpha[1] <- .015
} else if (i == "UDC") {
alpha[1] <- .035
} else if (i == "FLI") {
alpha[1] <- .015
} else if (i == "Lista.Monti.Camera") {
alpha[1] <- .09
} else if (i == "SEL") {
alpha[1] <- .04
} else if (i == "Altri.CSX") {
alpha[1] <- .015
} else if (i == "Rivoluzione.Civile") {
alpha[1] <- .04
} else if (i == "FermareDeclino") {
alpha[1] <- .015
}
list(house.star=house.star,alpha=alpha,
sigma=sigma)
}
my.model <- jags.model("model1.bug",
data=forJags,inits=initfunc(i),n.chains=1,
n.adapt=100)
out <- coda.samples(my.model,n.burnin=n.burn,
variable.names=c("alpha","house","sigma"),#,"senfx"),
n.iter=n.iter,thin=n.thin)
filename = paste("jags_output/jags_output_",i,"_",the.date,".RData",sep="")
save(out,forJags,file=filename)
}
#################################################################
## END OF NATIONAL ANALYSIS
#################################################################
model2 <- function() {
for(j in 1:nOrgs){ ## vague normal priors for house effects
house[j] ~ dnorm(true.house[j],true.prec.house[j])
house.cut[j] <- cut(house[j])
}
for(j in 1:nDailies) {
alpha[j] ~ dnorm(true.alpha[j],true.prec.alpha[j])
alpha.cut[j] <- cut(alpha[j])
}
for (i in 1:nregPolls) {
mu[i] <- house.cut[org[i]] + (alpha.cut[date[i]] * scale.factor[region[i]])
y[i] ~ dnorm(mu[i],prec[i])
}
for (i in 1:nRegions) {
scale.factor[i] ~ dnorm(mm[i],pm[i])
}
}
write.model(model2,con="model2.bug")
the.regions <- c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA",
"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA",
"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA",
"SICILIA","TOSCANA","UMBRIA","VENETO")
### Get regional priors, precisions
gdoc <- getURL("https://docs.google.com/spreadsheet/pub?hl=en&hl=en&key=0AlLkeTCb2GrxdDY3MDlDRzQ0WDVhT2xPRVpXYzAzclE&single=true&gid=0&output=csv")
mean.mat <- read.csv(textConnection(gdoc),header=T,check.names=T)
gdoc <- getURL("https://docs.google.com/spreadsheet/pub?hl=en&hl=en&key=0AlLkeTCb2GrxdF9lb2daMURVTlNsOWNFbUxsWktXOUE&single=true&gid=0&output=csv")
prec.mat <- read.csv(textConnection(gdoc),header=T,check.names=T)
### List files
### Exclude UDC, FLI
the.files <- list.files(path="jags_output/",
pattern = paste("jags_output.*",the.date,".RData",sep=""))
## Exclude regional files
the.files <- the.files[grep("region",the.files,invert=TRUE)]
## Exclude UDC, FLI
the.files <- the.files[grep("(UDC|FLI)",the.files,invert=TRUE)]
for (my.file in the.files) {
the.party <- gsub("jags_output_","",my.file)
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)
if (the.party == "Lista.Monti.Camera") {
the.party <- "Lista.Monti.Senato"
}
## Load the file
load(paste("jags_output/",my.file,sep=""))
holder <- summary(out)
## Identify parts of the file
dailies <- grep("alpha",rownames(holder$statistics))
house.fx <- grep("house",rownames(holder$statistics))
## ##############################################################
## Now get regional stuff
## ##############################################################
reg <- subset(dat,Area!="Italy")
reg$Region <- factor(toupper(reg$Area),
levels = the.regions,
ordered=TRUE)
reg <- reg[!is.na(reg$Region),]
reggood <- (!is.na(reg[,the.party]))
y <- reg[reggood,the.party]
var <- (y)*(1-(y))/(reg$nRespondents[reggood])
prec <- 1/ var
prec [!is.finite(prec)] <- max(prec[is.finite(prec)])
forJags <- list(y=y,
true.house = holder$statistics[house.fx,1],
true.prec.house = 1 / (holder$statistics[house.fx,2] ^ 2),
true.alpha = holder$statistics[dailies,1],
true.prec.alpha = 1 / (holder$statistics[dailies,2] ^ 2),
org=as.integer(reg$Company[reggood]),
nOrgs = length(company.levels),
nDailies = length(dailies),
region = as.integer(reg$Region[reggood]),
nRegions = length(the.regions),
date = reg$avg.date[reggood],
mm = mean.mat[,the.party],
pm = prec.mat[,the.party],
prec=prec,
nregPolls=length(y[reggood]))
#initfunc <- function(){
# house.star <- rnorm(n=forJags$nOrgs,sd=.075)
# nPeriods = length(min(dat$Julian):max(dat$Julian))
# sigma <- runif(n=1,0,.01)
# list(house.star=house.star,
# sigma=sigma)
#}
initfunc <- function(){
scale.factor <- rnorm(length(forJags$mm),
mean = forJags$mm,
sd = (1/forJags$pm))
list(scale.factor=scale.factor)
}
my.bugs.directory<-"/home/chris/.wine/drive_c/Program Files/WinBUGS14"
bugs.obj <- bugs(data=forJags, inits=initfunc,
model.file="model2.bug",
parameters.to.save=c("scale.factor"),
n.chains=3,
n.iter=1e5+1e4,
n.burnin=1e4,
n.thin=1e5/1e3,
codaPkg = TRUE,
bugs.directory=my.bugs.directory,
working.directory=".",
debug=F)
out <- read.bugs(bugs.obj)
filename = paste("jags_output/jags_output_",the.party,"_regions_",the.date,".RData",sep="")
save.image(file=filename)
}
#stop("End of evaluation: continue manually from here")
### Now get house effects
the.files <- list.files(path="jags_output/",pattern="jags_output_")
the.files <- the.files[grepl(the.date,the.files)]
## Exclude regional files
the.files <- the.files[grep("region",the.files,invert=TRUE)]
master <- vector("list",length(the.files))
setwd("jags_output")
for (i in 1:length(the.files)) {
the.party <- gsub("jags_output_","",the.files[i])
the.party <- gsub(".RData","",the.party)
the.party <- gsub(paste("_",the.date,sep=""),"",the.party)
load(the.files[i])
holder <- summary(out)
my.house <- grep("house",rownames(holder$statistics))
master[[i]] <- data.frame(Party=the.party,
Mean = holder$statistics[my.house,1],
Lo = holder$quantiles[my.house,1],
Lwr = holder$quantiles[my.house,2],
Upr = holder$quantiles[my.house,4],
Hi = holder$quantiles[my.house,5])
}
setwd("..")
master <- do.call("rbind",master)
### Now plot trends over time
###
the.files <- list.files(path="jags_output/",pattern="jags_output_")
the.files <- the.files[grepl(the.date,the.files)]
## Exclude regional files
the.files <- the.files[grep("region",the.files,invert=TRUE)]
master <- vector("list",length(the.files))
setwd("jags_output")
for (i in 1:length(the.files)) {
the.party <- gsub("jags_output_","",the.files[i])
the.party <- gsub(".RData","",the.party)
the.party <- gsub(paste("_",the.date,sep=""),"",the.party)
load(the.files[i])
holder <- summary(out)
my.alpha <- grep("alpha",rownames(holder$statistics))
master[[i]] <- data.frame(Party=the.party,
Mean = holder$statistics[my.alpha,1],
Lo = holder$quantiles[my.alpha,1],
Lwr = holder$quantiles[my.alpha,2],
Upr = holder$quantiles[my.alpha,4],
Hi = holder$quantiles[my.alpha,5])
}
setwd("..")
master <- do.call("rbind",master)
master$julian <- rep(1:length(my.alpha),times = length(the.files))
master$Date <- as.Date(master$julian,origin=start.date)
master$Party <- factor(master$Party,
levels=c("PD","PDL",
"M5S","Lista.Monti.Camera",
"Lega","SEL","Rivoluzione.Civile",
"UDC","FLI",
"Destra","Fratelli.d.Italia",
"Altri.CDX","Altri.CSX","FermareDeclino"))
my.pal <- brewer.pal(11,"Paired")
party.colors <- c(my.pal[6],my.pal[2],
my.pal[10],my.pal[1],
my.pal[4],my.pal[5],my.pal[8], ## riv civ.
my.pal[4],my.pal[7],
"black","gray",
my.pal[6],my.pal[2],my.pal[9])
## Break in to two pieces
top.parties <- levels(master$Party)[1:3]
middle.parties <- levels(master$Party)[4:7]
bottom.parties <- levels(master$Party)[8:length(levels(master$Party))]
## Set up limits
left.lim<-min(master$Date)
right.lim<-max(master$Date)+10
## Plot top
top.df <- master[master$Party %in% top.parties,]
maxdate <- max(top.df$Date)
top.plot <- ggplot(top.df,aes(x=Date,y=Mean,group=Party)) +
geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") +
geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") +
geom_line(color="white") +
scale_x_date("Date",limits=c(left.lim,right.lim)) +
geom_text(data = subset(top.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) +
scale_y_continuous("%",labels=percent) +
scale_fill_manual(values = party.colors,drop=T) +
scale_color_manual(values = party.colors,drop=T) +
theme_bw() +
opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep=""))
middle.df <- master[master$Party %in% middle.parties,]
maxdate <- max(middle.df$Date)
middle.plot <- ggplot(middle.df,aes(x=Date,y=Mean,group=Party)) +
geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") +
geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") +
geom_line(color="white") +
scale_x_date("Date",limits=c(left.lim,right.lim)) +
geom_text(data = subset(middle.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) +
scale_y_continuous("%",labels=percent) +
scale_fill_manual(values = party.colors,drop=T) +
scale_color_manual(values = party.colors,drop=T) +
theme_bw() +
opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep=""))
bottom.df <- master[master$Party %in% bottom.parties,]
## bottom.df <- master[master$Party == "Destra",]
maxdate <- max(bottom.df$Date)
bottom.plot <- ggplot(bottom.df,aes(x=Date,y=Mean,group=Party)) +
geom_smooth(aes(ymin=Lwr,ymax=Upr,color=Party,fill=Party),alpha=.5,stat="identity") +
geom_smooth(aes(ymin=Lo,ymax=Hi,color=Party,fill=Party),alpha=.3,stat="identity") +
geom_line(color="white") +
scale_x_date("Date",limits=c(left.lim,right.lim)) +
geom_text(data = subset(bottom.df,Date==max(Date)),aes(x=Date,y=Mean,color=Party,label=signif(Mean*100,2)),hjust=0,size=6) +
scale_y_continuous("%",labels=percent) +
scale_fill_manual(values = party.colors,drop=T) +
scale_color_manual(values = party.colors,drop=T) +
theme_bw() +
opts(legend.position="bottom",legend.direction="horizontal",title=paste("Polls up to ",maxdate,sep=""))
### Now plot regional scale factors
###
the.files <- list.files(pattern = paste("jags_output.*regions_",the.date,".RData",sep=""),
path="jags_output/")
the.regions <- c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA",
"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA",
"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA",
"SICILIA","TOSCANA","UMBRIA","VENETO")
for (my.file in the.files) {
the.party <- gsub("jags_output_","",my.file)
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)
the.party <- gsub("_regions","",the.party)
## Load the file
load(paste("jags_output/",my.file,sep=""))
holder <- summary(out)
## Plot the regional stuff
regional.df <- data.frame(Region = the.regions,
Mean = holder$statistics[-1,1],
Lo = holder$quantiles[-1,1],
Hi = holder$quantiles[-1,5])
regional.df$Region <- factor(regional.df$Region,ordered=T)
regional.df$Region <- reorder(regional.df$Region,regional.df$Mean)
regional.plot <- ggplot(regional.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region)) +
geom_pointrange() +
scale_x_discrete("") +
scale_y_continuous(name = paste(the.party, "regional scale factor")) +
coord_flip() + theme_bw()
## How does this compare to our priors?
mm <- mean.mat [ , the.party]
pm <- prec.mat [ , the.party]
prior.df <- data.frame(Region = the.regions,
Mean = mm,
Lo = mm - 1.96 * sqrt(1/pm),
Hi = mm + 1.96 * sqrt(1/pm))
prior.df$Region <- factor(prior.df$Region,ordered=T)
prior.df$Region <- reorder(prior.df$Region,prior.df$Mean)
prior.plot <- ggplot(prior.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region)) +
geom_pointrange() +
scale_x_discrete("") +
scale_y_continuous(name = paste(the.party, "regional scale factor")) +
coord_flip() + theme_bw()
regional.df$Variable <- "Estimated"
prior.df$Variable <- "Prior"
regional.df <- rbind(regional.df,prior.df)
comb.plot <- ggplot(regional.df,aes(ymin=Lo,ymax=Hi,y=Mean,x=Region,color=Variable)) +
geom_pointrange(position=position_dodge(width=.5, height=0)) +
scale_x_discrete("") +
scale_y_continuous(name = paste(the.party, "regional scale factor")) +
coord_flip() + theme_bw()
pdf(file = paste("images/",the.party,"_regional_scale.pdf",sep=""))
print(comb.plot)
dev.off()
}
### Votes to seat allocations
###
## First, the Camera
largest.remainders <- function(x,y) {
## x is vote shares
## y is number seats
if (length(y)>1) {
stop("Number of seats should have length 1")
}
x2 <- x/sum(x,na.rm=T)
quota <- 1 / y
seats <- floor(x2 / quota)
seats.left <- y - sum(seats)
if (seats.left > 0) {
remainder <- x2 %% quota
additional.seats <- order(remainder,decreasing=T)[1:seats.left]
seats[additional.seats] <- seats[additional.seats] + 1
}
seats
}
doCameraAlloc <- function(dat){
## Create coalitions
dat$Coalition <- car:::recode(as.character(dat$Party),
"c('Destra','PDL','Lega','Fratelli.d.Italia','Altri.CDX')='CDX';
c('UDC','FLI','Lista.Monti.Camera') = 'Centre';
c('PD','SEL','Altri.CSX') = 'CSX'")
## Check coalition/list thresholds
dat$InCoalition <- (dat$Party != dat$Coalition)
dat <- ddply(dat,.(Coalition),transform,CoalitionTotal = sum(Value,na.rm=T))
dat <- ddply(dat,.(Coalition),transform,Coalition2Pct = any(Value>2))
dat$makesThreshold.a <- ((dat$InCoalition == 1) & (dat$CoalitionTotal > 10) & (dat$Coalition2Pct == TRUE))
dat$makesThreshold.b <- ((dat$InCoalition == 0) & (dat$CoalitionTotal > 4))
dat$makesThreshold <- (dat$makesThreshold.a | dat$makesThreshold.b)
## Remove things which do not make the cut at this stage
dat <- subset(dat,makesThreshold)
## Check maj. premium, initial allocation
coalition.df <- unique(dat[,c("CoalitionTotal","Coalition")])
coalition.df$CoalitionAlloc <- largest.remainders(coalition.df$CoalitionTotal,629) ## 630 less Val d'Aosta
dat <- merge(dat,coalition.df,all=T)
isOkay <- (max(dat$CoalitionAlloc)>340)
if (isOkay) {
} else {
dat$CoalitionAlloc[which(dat$CoalitionTotal == max(dat$CoalitionTotal))] <- 340
}
## Check list within coalition thresholds
dat$makesThreshold.c <- (dat$InCoalition ==0) | (dat$Value > 2)
dat <- ddply(dat,.(Coalition),function(inner.df) {
inner.df$lowValues <-inner.df$Value
inner.df$lowValues [which(inner.df$Value > 2)] <- NA
inner.df$makesThreshold.c[which(inner.df$lowValues == max(inner.df$lowValues,na.rm=T))] <- TRUE
inner.df
})
## But impose a ban on Altri.CDX getting anything
dat$makesThreshold.c[dat$Party=="Altri.CDX"] <- FALSE
## Remove things which do not make the cut at this stage
dat <- subset(dat,makesThreshold.c)
## Allocate seats
dat <- ddply(dat,.(Coalition),function(inner.df){
inner.df$PartyAlloc <- largest.remainders(inner.df$Value,
unique(inner.df$CoalitionAlloc))
inner.df
})
dat
}
national.files <- list.files(pattern = paste("jags_output.*",the.date,".RData",sep=""),
path="jags_output/")
national.files <- national.files[grep("region",national.files,invert=TRUE)]
## Remove Monti in the Senate
national.files <- national.files[grep("Senato",national.files,invert=TRUE)]
nParties <- length(national.files)
nSimulations <- 1000
out.mat <- array(data=NA,
dim = c(nSimulations,nParties))
dimnames(out.mat)[[1]] <- as.character(1:1000)
dimnames(out.mat)[[2]] <- letters[1:nParties]
for (national.file in national.files) {
the.party <- gsub("jags_output_","",national.file)
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)
load(paste("jags_output/",national.file,sep=""))
national.out <- out
## Get the vote share for this party on the last available date
dailies <- grep("alpha",colnames(national.out[[1]]))
lastday <- dailies[length(dailies)]
national.vote <- as.matrix(national.out[[1]][,lastday])
national.vote [ which(national.vote < 0 )] <- 0
out.mat[,match(national.file,national.files)] <- national.vote
dimnames(out.mat)[[2]][match(national.file,national.files)] <- the.party
}
plot.df <- melt(out.mat)
names(plot.df) <- c("Simulation","Party","Value")
plot.df$Value <- plot.df$Value * 100
camera.seats <- ddply(plot.df,.(Simulation),doCameraAlloc)
write.csv(camera.seats,file=paste("camera_seats_",the.date,".csv",sep=""))
### Now get the regional vote shares
national.files <- list.files(pattern = paste("jags_output.*",the.date,".RData",sep=""),
path="jags_output/")
national.files <- national.files[grep("region",national.files,invert=TRUE)]
## Remove UDC, FLI
national.files <- national.files[grep("UDC|FLI",national.files,invert=TRUE)]
regional.files <- list.files(pattern = paste("jags_output.*regions_",the.date,".RData",sep=""),
path="jags_output/")
nRegions <- length(the.regions)
nParties <- length(national.files)
nSimulations <- 1000
out.mat <- array(data=NA,
dim = c(nRegions, nParties, nSimulations))
dimnames(out.mat)[[1]] <- the.regions
dimnames(out.mat)[[2]] <- letters[1:nParties]
dimnames(out.mat)[[3]] <- as.character(1:1000)
for (national.file in national.files) {
the.party <- gsub("jags_output_","",national.file)
the.party <- gsub(paste("_",the.date,".RData",sep=""),"",the.party)
load(paste("jags_output/",national.file,sep=""))
national.out <- out
if (the.party == "Lista.Monti.Camera") {
the.party <- "Lista.Monti.Senato"
}
regional.file <- regional.files[grep(paste(the.party,"_",sep=""),regional.files)]
load(paste("jags_output/",regional.file,sep=""))
regional.out <- out
## Get the vote share for this party on the last available date
dailies <- grep("alpha",colnames(national.out[[1]]))
lastday <- dailies[length(dailies)]
## Get the regional scale factors
regional.fx <- grep("scale.factor",colnames(regional.out[[1]]))
## If you want to know their performance on the last day
## summary(national.out[[1]][,lastday])
## If you want to know their regional scale factors
## summary(regional.out[[1]][,regional.fx])
rsf <- as.matrix(regional.out[[1]][,regional.fx])
rsf [which(rsf < 0) ] <- 0
national.vote <- as.matrix(national.out[[1]][,lastday])
national.vote [ which(national.vote < 0 )] <- 0
national.vote <- matrix(rep(national.vote,nRegions),nrow=nSimulations,ncol=nRegions)
foo <- national.vote * rsf
foo [ which(foo < 0) ] <- 0
colnames(foo) <- the.regions
out.mat[,match(national.file,national.files),] <- t(foo)
dimnames(out.mat)[[2]][match(national.file,national.files)] <- the.party
}
plot.df <- melt(out.mat[,,])
plot.df$value <- plot.df$value * 100
write.csv(plot.df,file= paste("vote_allocation_",the.date,".csv",sep=""))
pdf(file="images/regional_vote_shares.pdf",width=20,height=20)
ggplot(plot.df,aes(x=X1,y=value)) + geom_boxplot() +
facet_grid(~X2,scale="free") + scale_y_continuous() +
opts(axis.text.y = theme_text(angle=90)) + coord_flip() + theme_bw()
dev.off()
### Now convert to seats
seat.df <- data.frame(Region = c("ABRUZZO","BASILICATA","CALABRIA","CAMPANIA",
"EMILIA ROMAGNA","FRIULI VENEZIA GIULIA","LAZIO","LIGURIA",
"LOMBARDIA","MARCHE","MOLISE","PIEMONTE","PUGLIA","SARDEGNA",
"SICILIA","TOSCANA","UMBRIA","VENETO"),
Seats = c(7,7,10,29,
22,7,28,8,
49,8,2,22,20,8,
25,18,7,24))
seat.df$MajPrem <- floor(.55 * seat.df$Seats) + 1
seat.df$MajPrem[seat.df$Region=="MOLISE"] <- 1
names(plot.df) <- c("Region","Party","Simulation","value")
plot.df$Party <- as.character(plot.df$Party)
plot.df <- subset(plot.df,!is.element(Party,c("UDC","FLI")))
plot.df <- merge(plot.df,seat.df)
plot.df$Coalition <- car:::recode(as.character(plot.df$Party),
"c('Destra','PDL','Lega','Fratelli.d.Italia','Altri.CDX')='CDX';
c('PD','SEL','Altri.CSX') = 'CSX'")
plot.df$InCoalition <- as.numeric(plot.df$Party != plot.df$Coalition)
system.time(seat.alloc.df <- ddply(plot.df,.(Region,Simulation),function(df){
df <- ddply(df,.(Coalition),transform,CoalitionTotal = sum(value))
df <- ddply(df,.(Coalition),transform,Coalition3Pct = any(value>=3))
df$makesThreshold.b1 <- ((df$InCoalition == 1) & (df$CoalitionTotal > 20) & (df$Coalition3Pct == TRUE))
df$makesThreshold.b2a <- ((df$InCoalition == 0) & (df$value >= 8))
df$makesThreshold.b2b <- ((df$InCoalition == 1) & (df$value >= 8) & (df$makesThreshold.b1 == TRUE))
df$makesThreshold <- ((df$makesThreshold.b1) | (df$makesThreshold.b2a) | (df$makesThreshold.b2b))
df <- subset(df,makesThreshold == TRUE) ## DESTROY BELOW THRESHOLD PARTIES
df$RelevantEntity <- ifelse(df$makesThreshold.b1,
as.character(df$Coalition),
as.character(df$Party))
df$RelevantValue <- ifelse(df$makesThreshold.b1,df$CoalitionTotal,df$value)
relevant.df <- unique(df[,c("RelevantEntity","RelevantValue","Seats","MajPrem")])
## Create initial allocation
relevant.df$initialAlloc <- NA
relevant.df$initialAlloc <- largest.remainders(
x = relevant.df$RelevantValue,
y = unique(relevant.df$Seats))
## Check whether initial allocation acceptable
relevant.df$FinalAlloc <- NA
initialAllocSucceeds <- max(relevant.df$initialAlloc) >= unique(relevant.df$MajPrem)
if (initialAllocSucceeds) {
relevant.df$FinalAlloc <- relevant.df$initialAlloc
} else {
largest.entity <- which(relevant.df$RelevantValue == max(relevant.df$RelevantValue))
smaller.entities <- which(relevant.df$RelevantValue != max(relevant.df$RelevantValue))
largest.entity <- relevant.df[largest.entity,]
smaller.entities <- relevant.df[smaller.entities,]
largest.entity$FinalAlloc <- unique(largest.entity$MajPrem)
smaller.entities$FinalAlloc <- largest.remainders(
x = smaller.entities$RelevantValue,
y = unique(smaller.entities$Seats - smaller.entities$MajPrem))
relevant.df <- rbind(largest.entity,smaller.entities)
}
## Now allocate seats between within-coalition bodies
df <- merge(df,relevant.df)
df <- ddply(df,.(RelevantEntity),function(df2) {
df2$SecondAlloc <- largest.remainders(df2$value,unique(df2$FinalAlloc))
df2
})
})
)
write.csv(seat.alloc.df,file=paste("seat_allocation_",the.date,".csv",sep=""))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment