Created
December 6, 2011 22:32
-
-
Save thomasjensen/1440348 to your computer and use it in GitHub Desktop.
Outliers in the EP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#read the ggplot2 library | |
library(ggplot2) | |
#read the data | |
mepDat <- read.csv("ep6.csv") | |
#plot the density for the questions variable | |
plot <- ggplot(mepDat, aes(x = Questions)) | |
plot <- plot + geom_density() | |
plot <- plot + geom_segment(aes(x = median(mepDat$Questions), xend = median(mepDat$Questions), y = 0, yend = .031), linetype = 2) | |
plot <- plot + theme_bw() | |
plot | |
#plot the density for the speeches variable | |
plot <- ggplot(mepDat, aes(x = Speeches)) | |
plot <- plot + geom_density() | |
plot <- plot + geom_segment(aes(x = median(mepDat$Speeches), xend = median(mepDat$Speeches), y = 0, yend = .031), linetype = 2) | |
plot <- plot + theme_bw() | |
plot | |
#plot the two variables agains each other | |
plot <- ggplot(mepDat, aes(x = Questions, y = Speeches)) | |
plot <- plot+ geom_point(alpha = .5) | |
plot <- plot + geom_contour(aes(z = z), data = densdf) | |
plot <- plot + geom_hline(aes(yintercept = mean(Speeches))) | |
plot <- plot + geom_vline(aes(xintercept = mean(Questions))) | |
plot <- plot + theme_bw() | |
plot | |
#get value for the .95 quantile for the speeches and questions variables and store them in the upper vector | |
outliersQ <- quantile(mepDat$Questions, probs = c(0.05, 0.95)) | |
outliersS <- quantile(mepDat$Speeches, probs = c(0.05, 0.95)) | |
upper <- c(outliersQ[2], outliersS[2]) | |
#for MEPs that lie in the upper right quadrant get their distance to the intersection of the 95 quantiles | |
size <- vector() | |
for (i in 1:nrow(mepDat)){ | |
x <- c(mepDat$Questions[i], mepDat$Speeches[i]) | |
if (x[1] > upper[1] & x[2] > upper[2]){ | |
size[i] <- sqrt(sum(x - upper)) | |
} else { | |
size[i] <- 1 | |
} | |
} | |
mepDat$size <- size | |
#show the upper quantile intersection and scale the observations in upper right quadrant | |
plot <- ggplot(mepDat, aes(x = Questions, y = Speeches)) | |
plot <- plot + geom_point(aes(size = size)) | |
plot <- plot + geom_vline(aes(xintercept = upper[1])) | |
plot <- plot + geom_hline(aes(yintercept = upper[2])) | |
plot <- plot + theme_bw() | |
plot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment