Skip to content

Instantly share code, notes, and snippets.

@TonyLadson
Created September 12, 2016 03:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TonyLadson/2f11db733bd93fe47ebd3d276a81f819 to your computer and use it in GitHub Desktop.
Save TonyLadson/2f11db733bd93fe47ebd3d276a81f819 to your computer and use it in GitHub Desktop.
Functions to assist in identifying and investigating outliers in water quality data.
# function to calculate k of the max and min of a dataset
# k > 3 suggests data are 'far out'
Tukey_k <- function(x){
my.quantile <- quantile(x, na.rm = TRUE)
Q_25 <- my.quantile[2]
Q_75 <- my.quantile[4]
k_max <- as.vector((max(x, na.rm = TRUE) - Q_75)/(Q_75 - Q_25))
k_min <- as.vector((Q_25 - min(x, na.rm = TRUE))/(Q_75 - Q_25))
data.frame(k_max = k_max, k_min = k_min)
}
# Plot a histogram of the log of the data (with nice labels)
hist_log <- function(x,...){
xname <- deparse(substitute(x))
hist(log10(x), breaks = 'sturges', xaxt = 'n', xlab = xname, main = '',..., col = 'blue')
my_axTicks <- axTicks(1)
my_ticks <- floor(min(my_axTicks)):ceiling(max(my_axTicks))
axis(side = 1, at = my_ticks, labels = 10^my_ticks)
}
# QQ plot of logged data (with nice labels)
qq_log <- function(x){
xname <- deparse(substitute(x))
qqnorm(log10(x), yaxt = 'n', main = xname)
my_axTicks <- axTicks(2)
my_ticks <- floor(min(my_axTicks)):ceiling(max(my_axTicks))
axis(side = 2, at = my_ticks, labels = 10^my_ticks, las = 2)
qqline(log10(x), col = 'blue', lty = 2, lwd = 2)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment