Compression Test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
compressionTest <- function(code, years = 7, algo = "g") { | |
# The generic Quandl API key for TuringFinance. | |
Quandl.api_key("t6Rn1d5N1W6Qt4jJq_zC") | |
# Download the raw price data. | |
data <- Quandl(code, rows = -1, type = "xts") | |
# Extract the variable we are interested in. | |
ix.ac <- which(colnames(data) == "Adjusted Close") | |
if (length(ix.ac) == 0) | |
ix.ac <- which(colnames(data) == "Close") | |
ix.rate <- which(colnames(data) == "Rate") | |
closes <- data[ ,max(ix.ac, ix.rate)] | |
# Get the month endpoints. | |
monthends <- endpoints(closes) | |
monthends <- monthends[2:length(monthends) - 1] | |
# Observed compression ratios. | |
cratios <- c() | |
for (t in ((12 * years) + 1):length(monthends)) { | |
# Extract a window of length equal to years. | |
window <- closes[monthends[t - (12 * years)]:monthends[t]] | |
# Compute detrended log returns. | |
returns <- Return.calculate(window, method = "log") | |
returns <- na.omit(returns) - mean(returns, na.rm = T) | |
# Binarize the returns. | |
returns[returns < 0] <- 0 | |
returns[returns > 0] <- 1 | |
# Convert into raw hexadecimal. | |
hexrets <- bin2rawhex(returns) | |
# Compute the compression ratio | |
cratios <- c(cratios, length(memCompress(hexrets)) / | |
length(hexrets)) | |
} | |
# Expected compression ratios. | |
ecratios <- c() | |
for (i in 1:length(cratios)) { | |
# Generate some benchmark returns. | |
returns <- rnorm(252 * years) | |
# Binarize the returns. | |
returns[returns < 0] <- 0 | |
returns[returns > 0] <- 1 | |
# Convert into raw hexadecimal. | |
hexrets <- bin2rawhex(returns) | |
# Compute the compression ratio | |
ecratios <- c(ecratios, length(memCompress(hexrets)) / | |
length(hexrets)) | |
} | |
if (mean(cratios) >= min(1.0, mean(ecratios))) { | |
print(paste("Dataset:", code, "is not compressible { c =", | |
mean(cratios), "} --> efficient.")) | |
} else { | |
print(paste("Dataset:", code, "is compressible { c =", | |
mean(cratios), "} --> inefficient.")) | |
} | |
} | |
bin2rawhex <- function(bindata) { | |
bindata <- as.numeric(as.vector(bindata)) | |
lbindata <- split(bindata, ceiling(seq_along(bindata)/4)) | |
hexdata <- as.vector(unlist(mclapply(lbindata, bin2hex))) | |
hexdata <- paste(hexdata, sep = "", collapse = "") | |
hexdata <- substring(hexdata, | |
seq(1, nchar(hexdata), 2), | |
seq(2, nchar(hexdata), 2)) | |
return(as.raw(as.hexmode(hexdata))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment