Created
March 2, 2018 15:39
-
-
Save elliottmorris/6e61b6fcf5ddd82767ca3f11213745fe to your computer and use it in GitHub Desktop.
Fit Correlation Matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function written by HuffPost Pollster's Natalie Jackson for the 2016 election | |
# ibraries and import -------- | |
library(dplyr) | |
library('Matrix') | |
seat_data <- read.csv("seat_partisanship_demog.csv") # read in a 435 | |
seat_data <- seat_data %>% | |
select(clinton2016, obama2012, house2016, pct_nonwhite) | |
# make a correlations matrix for simulation -------- | |
seat_cor <- as.data.frame(t(merged)) | |
names(seat_cor) <- gsub("-","",gseats$Code) | |
corr <- cor(seat_cor, use="complete.obs") | |
row.names(corr) <- gsub("-","",gseats$Code) | |
head(corr) | |
write.csv(corr,"Tracker/Data/corr_matrix.csv",row.names=FALSE) | |
# this function reads the correlation matrix and corrects values to work properly when generating draws -------- | |
buildCorrelationConstant <- function() { | |
# Read correlations from file | |
frame <- read.csv("Tracker/Data/corr_matrix.csv") | |
rawMatrix <- as.matrix(frame) | |
# Make the matrix the nearest positive definite, but still correlated, | |
# because CorrelatedRnorm() won't work with the negative eigenvalues the | |
# original generates. | |
pdMatrix <- as.matrix(nearPD(rawMatrix, corr=TRUE)$mat) | |
ev <- eigen(pdMatrix, symmetric=TRUE) | |
return(ev$vectors %*% diag(sqrt(ev$values)) %*% t(ev$vectors)) | |
} | |
kCorrelationConstant <- buildCorrelationConstant() | |
# here, we actually draw some random numbers ---------- | |
CorrelatedRnorm <- function() { | |
n <- nrow(kCorrelationConstant) | |
random <- rnorm(n) | |
ret <- kCorrelationConstant %*% random | |
return(drop(ret)) | |
# and here's a plot to compare clinton share and the generated error ------ | |
data.frame("corr"=CorrelatedRnorm(),"clinton"=seat_data$clinton2016) %>% | |
ggplot(aes(clinton,corr))+ | |
geom_point() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment