Skip to content

Instantly share code, notes, and snippets.

@DavidArenburg
Last active April 22, 2018 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DavidArenburg/fd56948c9ba9f409570c6438e64a326d to your computer and use it in GitHub Desktop.
Save DavidArenburg/fd56948c9ba9f409570c6438e64a326d to your computer and use it in GitHub Desktop.
anomaly2 <- function (x, n = if(is.null(sig)) 10 else NULL, method = "hdr", robust = TRUE, plot = TRUE,
labels = TRUE, col, sig = NULL) {
nc <- nrow(x)
if(!is.null(n)){
if (nc < n) {
stop("Your n is too large.")
}
}
if(!is.null(sig)){
if((sig < 0.001) | (sig >= .5)) {
stop("Your sig is out of range.")
}
}
x[is.infinite(x)] <- NA
naomit.x <- na.omit(x)
na.act <- na.action(naomit.x)
if (is.null(na.act)) {
avl <- 1:nc
}
else {
avl <- (1:nc)[-na.action(naomit.x)]
}
method <- match.arg(method)
if (robust) {
rbt.pca <- pcaPP::PCAproj(naomit.x, k = 2, center = mean,
scale = sd)
}
else {
rbt.pca <- princomp(scale(naomit.x, center = TRUE, scale = TRUE),
cor = TRUE)
}
scores <- rbt.pca$scores
scoreswNA <- matrix(, nrow = nc, ncol = 2)
scoreswNA[avl, ] <- scores[, 1:2]
#tmp.idx <- vector(length = n) # What's that for?
if (method == "hdr") {
if(!is.null(n)){
hdrinfo <- hdrcde::hdr.2d(x = scores[, 1], y = scores[, 2], kde.package = "ks")
tmp.idx <- order(hdrinfo$fxy)[1:n]
} else if (!is.null(sig)){
hdrinfo <- hdrcde::hdr.2d(x = scores[, 1], y = scores[, 2], kde.package = "ks", prob = (1 - sig) * 100)
tmp.ordr <- order(hdrinfo$fxy)
tmp.idx <- tmp.ordr[which(hdrinfo$fxy[tmp.ordr] <= hdrinfo$falpha)]
} else {
stop("Please specify either n or sig")
}
main <- "Lowest densities on anomalies"
}
idx <- avl[tmp.idx]
if (plot) {
if (missing(col)) {
col <- c("grey", "darkblue")
}
else {
lencol <- length(col)
if (lencol == 1L) {
col <- rep(col, 2)
}
else {
col <- unique(col)[1:2]
}
}
xrange <- range(scores[, 1], na.rm = TRUE)
yrange <- range(scores[, 2], na.rm = TRUE)
plot(x = scores[-tmp.idx, 1], y = scores[-tmp.idx, 2],
pch = 19, col = col[1L], xlab = "PC1", ylab = "PC2",
main = main, xlim = xrange, ylim = yrange)
points(scores[tmp.idx, 1], scores[tmp.idx, 2], col = col[2L],
pch = 17)
if (labels) {
text(scores[tmp.idx, 1] + 0.3, scores[tmp.idx, 2],
col = col[2L], label = 1:length(idx), cex = 1.2)
}
}
return(structure(list(index = idx, scores = scoreswNA)))
}

This function biscally adds an option to select time series by the significance of their anomaly specified by the new sig parameter. It is backward compatible, so shouldn't brake any exsisting code, see below

# Load the anomalous package
library(anomalous)

## Create measures
y <- tsmeasures(dat0, window = 30)

###### Backward compatibility
## Compare default behaviour
identical(anomaly(y, method = "hdr"), 
          anomaly2(y, method = "hdr"))

## Make sure that if a user specifies n, it behaves as before
n <- 5
identical(anomaly(y, n = n, method = "hdr"), 
          anomaly2(y, n = n, method = "hdr"))

## Testing new feature
hdr <- anomaly2(y, sig = .15, method = "hdr")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment