Skip to content

Instantly share code, notes, and snippets.

@emhart
Created February 15, 2014 21:57
Show Gist options
  • Save emhart/9025719 to your computer and use it in GitHub Desktop.
Save emhart/9025719 to your computer and use it in GitHub Desktop.
Outlier plot
library(ggplot)
library(mvtnorm)
library(reshape2)
library(plyr)
#Generate data with some outliers
dat <- rbind(rmvnorm(100,mean=c(1,4,10,100),sigma=diag(c(1,4,3,10),4,4)),rmvnorm(10,mean=c(1,4,10,100),sigma=diag(c(100,100,100,100),4,4)))
dat <- data.frame(dat)
dat$rowID <- 1:dim(dat)[1]
colnames(dat) <- c("Mean_1","Mean_4","Mean_10","Mean_100","RowID")
mdf <- melt(dat,id.vars=c("RowID"))
mdf$x <- rep("Outlier Plot",dim(mdf)[1])
### get outlier criterion (from nist)
iqrs <- ddply(mdf,.(variable),summarise, lq = quantile(value,.25), uq = quantile(value,.75))
iqrs$iqr <- iqrs$uq - iqrs$lq
iqrs$ll <- iqrs$lq - iqrs$iqr*1.5
iqrs$ul <- iqrs$uq + iqrs$iqr*1.5
### find outliers
out_status <-list()
for(i in 1:dim(iqrs)[1]){
tmp_data <- mdf[mdf$variable == iqrs$variable[i],]
out_status[[i]] <- rep("In range", dim(tmp_data)[1])
out_status[[i]][which(tmp_data$value > iqrs$ul[i] | tmp_data$value < iqrs$ll[i])] <- "Outlier"
}
mdf$status <- unlist(out_status)
ggplot(mdf,aes(y=value, x = x, colour = status)) + geom_point() + facet_wrap(~variable,scale="free")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment