Skip to content

Instantly share code, notes, and snippets.

@joshuaulrich
Created August 4, 2017 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshuaulrich/82ae3d6588a4c93e4c23f4657b22dd53 to your computer and use it in GitHub Desktop.
Save joshuaulrich/82ae3d6588a4c93e4c23f4657b22dd53 to your computer and use it in GitHub Desktop.
Detect outliers in 'real-time' using Richard Olsen's method in "High Frequency Finance"
/*
* Copyright (C) 2017 Joshua M. Ulrich
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <R.h>
#include <Rinternals.h>
/* Detect outliers in "real-time" by constructing an EMA-based z-score using
* only non-outlier values.
*
* x_ Univariate time series
* n_ Number of periods to use for the EMA
* thresh_ Threshold for z-score
*/
SEXP detect_outliers_ema (SEXP x_, SEXP n_, SEXP thresh_)
{
int P = 0;
if (ncols(x_) > 1)
error("ncol(x) > 1; input must be univariate");
int nr = nrows(x_); /* Input object length */
int n = asInteger(n_);
if (n < 1 || n > nr)
error("Invalid n; must be n > 1 and n < nrow(x)");
/* ensure that 'x' is double */
if (TYPEOF(x_) != REALSXP) {
x_ = PROTECT(coerceVector(x_, REALSXP)); P++;
}
/* Initalize result R object */
SEXP result_ = PROTECT(allocVector(REALSXP, nr)); P++;
/* Pointers to function arguments */
double *x = REAL(x_);
double *result = REAL(result_);
int beg = n - 1;
double ema1 = result[beg]; /* Raw mean to start EMA */
/* Find first non-NA input value */
for (int i = 0; i <= beg; i++) {
/* Account for leading NAs in input */
if (ISNA(x[i])) {
result[i] = NA_REAL;
beg++;
ema1 = result[beg];
continue;
}
/* Set output to input */
if (i < beg) {
result[i] = x[i];
}
ema1 += x[i] / n;
}
/* Calculation:
* mu[i] = EMA(x, n)
* dev[i] = x[i] - mu[i-1]
* var[i] = EMA(dev^2, n)
*
* zscore[i] = abs(dev[i]) / sqrt(var[i-1])
*/
/* Loop over non-NA input values */
double thresh = asReal(thresh_);
double ratio = 2.0 / (n + 1.0);
double ratio1 = 1.0 - ratio;
double emv1 = pow(x[beg] - ema1, 2);
result[beg] = x[beg];
for (int i = beg + 1; i < nr; i++) {
double score = abs(x[i] - ema1) / sqrt(emv1);
if (score < thresh) {
result[i] = x[i];
ema1 = x[i] * ratio + ema1 * ratio1;
emv1 = pow(x[i] - ema1, 2) * ratio + emv1 * ratio1;
} else {
result[i] = NA_REAL;
}
}
UNPROTECT(P);
return result_;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment