Skip to content

Instantly share code, notes, and snippets.

@thomasjensen
Created December 6, 2011 22:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thomasjensen/1440348 to your computer and use it in GitHub Desktop.
Save thomasjensen/1440348 to your computer and use it in GitHub Desktop.
Outliers in the EP
#read the ggplot2 library
library(ggplot2)
#read the data
mepDat <- read.csv("ep6.csv")
#plot the density for the questions variable
plot <- ggplot(mepDat, aes(x = Questions))
plot <- plot + geom_density()
plot <- plot + geom_segment(aes(x = median(mepDat$Questions), xend = median(mepDat$Questions), y = 0, yend = .031), linetype = 2)
plot <- plot + theme_bw()
plot
#plot the density for the speeches variable
plot <- ggplot(mepDat, aes(x = Speeches))
plot <- plot + geom_density()
plot <- plot + geom_segment(aes(x = median(mepDat$Speeches), xend = median(mepDat$Speeches), y = 0, yend = .031), linetype = 2)
plot <- plot + theme_bw()
plot
#plot the two variables agains each other
plot <- ggplot(mepDat, aes(x = Questions, y = Speeches))
plot <- plot+ geom_point(alpha = .5)
plot <- plot + geom_contour(aes(z = z), data = densdf)
plot <- plot + geom_hline(aes(yintercept = mean(Speeches)))
plot <- plot + geom_vline(aes(xintercept = mean(Questions)))
plot <- plot + theme_bw()
plot
#get value for the .95 quantile for the speeches and questions variables and store them in the upper vector
outliersQ <- quantile(mepDat$Questions, probs = c(0.05, 0.95))
outliersS <- quantile(mepDat$Speeches, probs = c(0.05, 0.95))
upper <- c(outliersQ[2], outliersS[2])
#for MEPs that lie in the upper right quadrant get their distance to the intersection of the 95 quantiles
size <- vector()
for (i in 1:nrow(mepDat)){
x <- c(mepDat$Questions[i], mepDat$Speeches[i])
if (x[1] > upper[1] & x[2] > upper[2]){
size[i] <- sqrt(sum(x - upper))
} else {
size[i] <- 1
}
}
mepDat$size <- size
#show the upper quantile intersection and scale the observations in upper right quadrant
plot <- ggplot(mepDat, aes(x = Questions, y = Speeches))
plot <- plot + geom_point(aes(size = size))
plot <- plot + geom_vline(aes(xintercept = upper[1]))
plot <- plot + geom_hline(aes(yintercept = upper[2]))
plot <- plot + theme_bw()
plot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment