Created
December 14, 2020 09:30
-
-
Save pluser/a979a18fe0af59a39f11bc2986223cb4 to your computer and use it in GitHub Desktop.
Compute entropy of 1-dimensional array of time series data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- mode: julia-mode -*- | |
if abspath(PROGRAM_FILE) == @__FILE__ | |
using ArgParse | |
using HDF5 | |
using CSV | |
end | |
using Memoize | |
#= | |
Measure the distance of specified vector. | |
data is the time series 1 dimensional data, | |
m is dimension of embedding dimension, | |
idx and jdx is the first element of vector to compair. | |
=# | |
function dist(data::AbstractVector{T}, m::Int, idx::Int, jdx::Int)::T where{T<:AbstractFloat} | |
return maximum(abs.(data[idx:idx+m-1] .- data[jdx:jdx+m-1])) | |
end | |
""" | |
within_array(data, dimension, tolerance, index) | |
Consider a vector sequence of m-dimensional embeddings | |
from 1-dimensional data. It determine whether a point | |
inside the radius r of the idxth point can be contained, | |
and return the result as an boolean array. | |
The result always contains one or more true values | |
because it includes itself. | |
The implementation uses the fact that as the dimension increases, | |
the distances of the points are getting wider. | |
""" | |
@memoize function within_array(data::AbstractVector{T}, m::Int, r::T, idx::Int)::Vector{Bool} where{T<:AbstractFloat} | |
if m > 1 | |
N::Int = length(data) - m + 1 | |
need_calc = copy(within_array(data, m-1, r, idx)) | |
for i in 1:N | |
if need_calc[i] | |
need_calc[i] = abs(data[i+m-1] - data[idx+m-1]) <= r | |
end | |
end | |
resize!(need_calc, N) | |
return need_calc | |
else | |
return abs.(data .- data[idx]) .<= r | |
end | |
end | |
#= | |
Count number of vectors which is matched with template. | |
As of including index vector itself, return value is always one or more. | |
data is the time series 1 dimensional data, | |
m is dimension of embedding dimension, | |
r is tolerance for counting. | |
=# | |
function count_matching(data::AbstractVector{<:AbstractFloat}, m::Int, r::AbstractFloat, idx::Int)::Int | |
N::Int = length(data) - m + 1 | |
function naive_method()::Int | |
counter::Int = 0 | |
for j in 1:N | |
if dist(data, m, idx, j) <= r | |
counter += 1 | |
end | |
end | |
return counter | |
end | |
function opt_method()::Int | |
return count(within_array(data, m, r, idx)) | |
end | |
#@assert naive_method() == opt_method() | |
return opt_method() | |
end | |
#= | |
Sample Entropy. | |
The parameters L, m, and r must be fixed for each calculation. | |
L is the time series data, m is the length of | |
sequences to be compared, and r is the tolerance for | |
accepting matches. | |
Implement is based on https://doi.org/10.1152/ajpheart.2000.278.6.H2039 | |
=# | |
function sampen(L::AbstractVector{<:AbstractFloat}, m::Int, r::AbstractFloat) | |
N::Int = length(L) | |
#tmpL = @view L[1:N-1] | |
bimr_n = (i) -> begin | |
count_matching(L, m, r, i) - 1 | |
end | |
bmr_n = () -> begin | |
sum::Int = 0 | |
for i in 1:N-m | |
sum += bimr_n(i) | |
end | |
# the last vector should not be counted. | |
sum -= 2*count_matching(L, m, r, N - m + 1) | |
return sum | |
end | |
aimr_n = (i) -> count_matching(L, m + 1, r, i) - 1 | |
amr_n = () -> begin | |
sum::Int = 0 | |
for i in 1:N-m | |
sum += aimr_n(i) | |
end | |
return sum | |
end | |
return -log(amr_n()/bmr_n()) | |
end | |
#= | |
Approximate Entropy. | |
The parameters N, m, and r must be fixed for each calculation. | |
N is the length of the time series, m is the length of | |
sequences to be compared, and r is the tolerance for | |
accepting matches. | |
Implement is based on https://doi.org/10.1007/BF01619355 | |
=# | |
function apen(L::Array{<:AbstractFloat,1}, m::Int, r::AbstractFloat) | |
N::Int = length(L) | |
function cimr(m, idx) | |
return count_matching(L, m, r, idx) / (N - m + 1) | |
end | |
function phi(m, r) | |
sum::Float32 = 0 | |
for i in 1 : (N - m + 1) | |
sum += log(cimr(m, i)) | |
end | |
return sum / (N - m + 1) | |
end | |
return phi(m, r) - phi(m + 1, r) | |
end | |
#= | |
Kolmogorov Sinai Entropy. | |
KS-entropy is identical as taking limits of Approximate Entropy. | |
The parameters N, m, and r must be fixed for each calculation. | |
N is the length of the time series, m is the length of | |
sequences to be compared, and r is the tolerance for | |
accepting matches. | |
Implement is based on https://doi.org/10.1016/0370-1573(93)90012-3 | |
=# | |
function ksen(L::Array{<:AbstractFloat,1}, m::Int, r::AbstractFloat) | |
error("Not Implemented") | |
end | |
if abspath(PROGRAM_FILE) == @__FILE__ | |
function cmd_entrypoint() | |
method_map = Dict([("sampen", sampen), ("apen", apen), ("ksen", ksen)]) | |
s = ArgParseSettings() | |
s.description = "A calculator of Sample/Approximate/KS entropy" | |
s.version = "1.0" | |
add_arg_table( | |
s, | |
["--input", "-i"], | |
Dict( | |
:help => "filename for the time series data.\nspecial arg '-' means read from stdin", | |
:arg_type => String, | |
:required => true | |
)) | |
add_arg_table( | |
s, | |
["--method", "-w"], | |
Dict( | |
:help => "method to calculate entropy.\nit should be one of 'sampen', 'apen', 'ksen'.", | |
:arg_type => String, | |
:range_tester => (x -> x in keys(method_map)), | |
:required => true | |
)) | |
add_arg_table( | |
s, | |
["--dimension", "--dim", "-d", "-m"], | |
Dict( | |
:help => "dimension for embedding", | |
:arg_type => Int, | |
:required => true | |
)) | |
add_arg_table( | |
s, | |
["--tolerance", "--tor", "-r"], | |
Dict( | |
:help => "tolerance for matching vertors", | |
:arg_type => Float32, | |
:required => true | |
)) | |
args = parse_args(s) | |
if args["input"] != "-" | |
# read from file | |
filename = args["input"] | |
h5open(filename, "r") do fd | |
tsdata = read(fd, "tsdata") | |
end | |
else | |
# read from stdin | |
tsdata = parse.(Float32, split(readline(stdin), (' ', ','))) | |
end | |
func = method_map[args["method"]] | |
dim = args["dimension"] | |
tol = args["tolerance"] | |
result = func(tsdata, dim, tol) | |
println(result) | |
end | |
cmd_entrypoint() | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment