Created
May 12, 2017 20:39
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(microbenchmark) | |
library(ggplot2) | |
HugovdBerg <- function(x, method = "one", na.rm = FALSE) { | |
x <- unlist(x) | |
if (na.rm) { | |
x <- x[!is.na(x)] | |
} | |
# Get unique values | |
ux <- unique(x) | |
n <- length(ux) | |
# Get frequencies of all unique values | |
frequencies <- tabulate(match(x, ux)) | |
modes <- frequencies == max(frequencies) | |
# Determine number of modes | |
nmodes <- sum(modes) | |
nmodes <- ifelse(nmodes==n, 0L, nmodes) | |
if (method %in% c("one", "mode", "") | is.na(method)) { | |
# Return NA if not exactly one mode, else return the mode | |
if (nmodes != 1) { | |
return(NA) | |
} else { | |
return(ux[which(modes)]) | |
} | |
} else if (method %in% c("n", "nmodes")) { | |
# Return the number of modes | |
return(nmodes) | |
} else if (method %in% c("all", "modes")) { | |
# Return NA if no modes exist, else return all modes | |
if (nmodes > 0) { | |
return(ux[which(modes)]) | |
} else { | |
return(NA) | |
} | |
} | |
warning("Warning: method not recognised. Valid methods are 'one'/'mode' [default], 'n'/'nmodes' and 'all'/'modes'") | |
} | |
Chris <- function (x, method = "mode", na.rm = FALSE) | |
{ | |
x <- unlist(x) | |
if (na.rm) | |
x <- x[!is.na(x)] | |
u <- unique(x) | |
n <- length(u) | |
#get frequencies of each of the unique values in the vector | |
frequencies <- rep(0, n) | |
for (i in seq_len(n)) { | |
if (is.na(u[i])) { | |
frequencies[i] <- sum(is.na(x)) | |
} | |
else { | |
frequencies[i] <- sum(x == u[i], na.rm = TRUE) | |
} | |
} | |
#mode if a unimodal vector, else NA | |
if (method == "mode" | is.na(method) | method == "") | |
{return(ifelse(length(frequencies[frequencies==max(frequencies)])>1,NA,u[which.max(frequencies)]))} | |
#number of modes | |
if(method == "nmode" | method == "nmodes") | |
{return(length(frequencies[frequencies==max(frequencies)]))} | |
#list of all modes | |
if (method == "modes" | method == "modevalues") | |
{return(u[which(frequencies==max(frequencies), arr.ind = FALSE, useNames = FALSE)])} | |
#error trap the method | |
warning("Warning: method not recognised. Valid methods are 'mode' [default], 'nmodes' and 'modes'") | |
return() | |
} | |
KenWilliams <- function(x) { | |
ux <- unique(x) | |
ux[which.max(tabulate(match(x, ux)))] | |
} | |
Tyler <- function(dataframe){ | |
DF <- as.data.frame(dataframe) | |
MODE2 <- function(x){ | |
if (is.numeric(x) == FALSE){ | |
df <- as.data.frame(table(x)) | |
df <- df[order(df$Freq), ] | |
m <- max(df$Freq) | |
MODE1 <- as.vector(as.character(subset(df, Freq == m)[, 1])) | |
if (sum(df$Freq)/length(df$Freq)==1){ | |
warning("No Mode: Frequency of all values is 1", call. = FALSE) | |
}else{ | |
return(MODE1) | |
} | |
}else{ | |
df <- as.data.frame(table(x)) | |
df <- df[order(df$Freq), ] | |
m <- max(df$Freq) | |
MODE1 <- as.vector(as.numeric(as.character(subset(df, Freq == m)[, 1]))) | |
if (sum(df$Freq)/length(df$Freq)==1){ | |
warning("No Mode: Frequency of all values is 1", call. = FALSE) | |
}else{ | |
return(MODE1) | |
} | |
} | |
} | |
return(as.vector(lapply(DF, MODE2))) | |
} | |
BitScavenger <- function(x) { | |
x <- sort(x) | |
u <- unique(x) | |
y <- lapply(u, function(y) length(x[x==y])) | |
u[which( unlist(y) == max(unlist(y)) )] | |
} | |
set.seed(20160629) | |
L <- sample(c(TRUE, FALSE, NA), 10^4, replace = TRUE) | |
N <- sample(c(-200:200, NaN, Inf, NA), 10^4, replace = TRUE) | |
C <- sample(c(LETTERS, letters, NA), 10^4, replace = TRUE) | |
CF <- factor(sample(c(LETTERS, letters, NA), 10^4, replace = TRUE)) | |
mb <- microbenchmark(HugovdBerg(L), HugovdBerg(N), HugovdBerg(C), HugovdBerg(CF), | |
KenWilliams(L), KenWilliams(N), KenWilliams(C), KenWilliams(CF), | |
Chris(L), Chris(N), Chris(C), Chris(CF), | |
BitScavenger(L), BitScavenger(N), BitScavenger(C), BitScavenger(CF), | |
Tyler(L), Tyler(N), Tyler(C), Tyler(CF), | |
times = 200) | |
plt <- ggplot(mb, aes(x=expr, y=log(time))) + | |
geom_boxplot() + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.3)) | |
ggsave('c:/temp/compareModes.png', plot = plt, width = 32, height = 24, units = 'cm') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment