Skip to content

Instantly share code, notes, and snippets.

@StuartGordonReid
Created April 16, 2016 20:17
Show Gist options
  • Save StuartGordonReid/02ecb544dd23c4147da5093cc84f39c7 to your computer and use it in GitHub Desktop.
Save StuartGordonReid/02ecb544dd23c4147da5093cc84f39c7 to your computer and use it in GitHub Desktop.
Compression Test
compressionTest <- function(code, years = 7, algo = "g") {
# The generic Quandl API key for TuringFinance.
Quandl.api_key("t6Rn1d5N1W6Qt4jJq_zC")
# Download the raw price data.
data <- Quandl(code, rows = -1, type = "xts")
# Extract the variable we are interested in.
ix.ac <- which(colnames(data) == "Adjusted Close")
if (length(ix.ac) == 0)
ix.ac <- which(colnames(data) == "Close")
ix.rate <- which(colnames(data) == "Rate")
closes <- data[ ,max(ix.ac, ix.rate)]
# Get the month endpoints.
monthends <- endpoints(closes)
monthends <- monthends[2:length(monthends) - 1]
# Observed compression ratios.
cratios <- c()
for (t in ((12 * years) + 1):length(monthends)) {
# Extract a window of length equal to years.
window <- closes[monthends[t - (12 * years)]:monthends[t]]
# Compute detrended log returns.
returns <- Return.calculate(window, method = "log")
returns <- na.omit(returns) - mean(returns, na.rm = T)
# Binarize the returns.
returns[returns < 0] <- 0
returns[returns > 0] <- 1
# Convert into raw hexadecimal.
hexrets <- bin2rawhex(returns)
# Compute the compression ratio
cratios <- c(cratios, length(memCompress(hexrets)) /
length(hexrets))
}
# Expected compression ratios.
ecratios <- c()
for (i in 1:length(cratios)) {
# Generate some benchmark returns.
returns <- rnorm(252 * years)
# Binarize the returns.
returns[returns < 0] <- 0
returns[returns > 0] <- 1
# Convert into raw hexadecimal.
hexrets <- bin2rawhex(returns)
# Compute the compression ratio
ecratios <- c(ecratios, length(memCompress(hexrets)) /
length(hexrets))
}
if (mean(cratios) >= min(1.0, mean(ecratios))) {
print(paste("Dataset:", code, "is not compressible { c =",
mean(cratios), "} --> efficient."))
} else {
print(paste("Dataset:", code, "is compressible { c =",
mean(cratios), "} --> inefficient."))
}
}
bin2rawhex <- function(bindata) {
bindata <- as.numeric(as.vector(bindata))
lbindata <- split(bindata, ceiling(seq_along(bindata)/4))
hexdata <- as.vector(unlist(mclapply(lbindata, bin2hex)))
hexdata <- paste(hexdata, sep = "", collapse = "")
hexdata <- substring(hexdata,
seq(1, nchar(hexdata), 2),
seq(2, nchar(hexdata), 2))
return(as.raw(as.hexmode(hexdata)))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment