Skip to content

Instantly share code, notes, and snippets.

@pluser
Created December 14, 2020 09:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pluser/a979a18fe0af59a39f11bc2986223cb4 to your computer and use it in GitHub Desktop.
Save pluser/a979a18fe0af59a39f11bc2986223cb4 to your computer and use it in GitHub Desktop.
Compute entropy of 1-dimensional array of time series data
#-*- mode: julia-mode -*-
if abspath(PROGRAM_FILE) == @__FILE__
using ArgParse
using HDF5
using CSV
end
using Memoize
#=
Measure the distance of specified vector.
data is the time series 1 dimensional data,
m is dimension of embedding dimension,
idx and jdx is the first element of vector to compair.
=#
function dist(data::AbstractVector{T}, m::Int, idx::Int, jdx::Int)::T where{T<:AbstractFloat}
return maximum(abs.(data[idx:idx+m-1] .- data[jdx:jdx+m-1]))
end
"""
within_array(data, dimension, tolerance, index)
Consider a vector sequence of m-dimensional embeddings
from 1-dimensional data. It determine whether a point
inside the radius r of the idxth point can be contained,
and return the result as an boolean array.
The result always contains one or more true values
because it includes itself.
The implementation uses the fact that as the dimension increases,
the distances of the points are getting wider.
"""
@memoize function within_array(data::AbstractVector{T}, m::Int, r::T, idx::Int)::Vector{Bool} where{T<:AbstractFloat}
if m > 1
N::Int = length(data) - m + 1
need_calc = copy(within_array(data, m-1, r, idx))
for i in 1:N
if need_calc[i]
need_calc[i] = abs(data[i+m-1] - data[idx+m-1]) <= r
end
end
resize!(need_calc, N)
return need_calc
else
return abs.(data .- data[idx]) .<= r
end
end
#=
Count number of vectors which is matched with template.
As of including index vector itself, return value is always one or more.
data is the time series 1 dimensional data,
m is dimension of embedding dimension,
r is tolerance for counting.
=#
function count_matching(data::AbstractVector{<:AbstractFloat}, m::Int, r::AbstractFloat, idx::Int)::Int
N::Int = length(data) - m + 1
function naive_method()::Int
counter::Int = 0
for j in 1:N
if dist(data, m, idx, j) <= r
counter += 1
end
end
return counter
end
function opt_method()::Int
return count(within_array(data, m, r, idx))
end
#@assert naive_method() == opt_method()
return opt_method()
end
#=
Sample Entropy.
The parameters L, m, and r must be fixed for each calculation.
L is the time series data, m is the length of
sequences to be compared, and r is the tolerance for
accepting matches.
Implement is based on https://doi.org/10.1152/ajpheart.2000.278.6.H2039
=#
function sampen(L::AbstractVector{<:AbstractFloat}, m::Int, r::AbstractFloat)
N::Int = length(L)
#tmpL = @view L[1:N-1]
bimr_n = (i) -> begin
count_matching(L, m, r, i) - 1
end
bmr_n = () -> begin
sum::Int = 0
for i in 1:N-m
sum += bimr_n(i)
end
# the last vector should not be counted.
sum -= 2*count_matching(L, m, r, N - m + 1)
return sum
end
aimr_n = (i) -> count_matching(L, m + 1, r, i) - 1
amr_n = () -> begin
sum::Int = 0
for i in 1:N-m
sum += aimr_n(i)
end
return sum
end
return -log(amr_n()/bmr_n())
end
#=
Approximate Entropy.
The parameters N, m, and r must be fixed for each calculation.
N is the length of the time series, m is the length of
sequences to be compared, and r is the tolerance for
accepting matches.
Implement is based on https://doi.org/10.1007/BF01619355
=#
function apen(L::Array{<:AbstractFloat,1}, m::Int, r::AbstractFloat)
N::Int = length(L)
function cimr(m, idx)
return count_matching(L, m, r, idx) / (N - m + 1)
end
function phi(m, r)
sum::Float32 = 0
for i in 1 : (N - m + 1)
sum += log(cimr(m, i))
end
return sum / (N - m + 1)
end
return phi(m, r) - phi(m + 1, r)
end
#=
Kolmogorov Sinai Entropy.
KS-entropy is identical as taking limits of Approximate Entropy.
The parameters N, m, and r must be fixed for each calculation.
N is the length of the time series, m is the length of
sequences to be compared, and r is the tolerance for
accepting matches.
Implement is based on https://doi.org/10.1016/0370-1573(93)90012-3
=#
function ksen(L::Array{<:AbstractFloat,1}, m::Int, r::AbstractFloat)
error("Not Implemented")
end
if abspath(PROGRAM_FILE) == @__FILE__
function cmd_entrypoint()
method_map = Dict([("sampen", sampen), ("apen", apen), ("ksen", ksen)])
s = ArgParseSettings()
s.description = "A calculator of Sample/Approximate/KS entropy"
s.version = "1.0"
add_arg_table(
s,
["--input", "-i"],
Dict(
:help => "filename for the time series data.\nspecial arg '-' means read from stdin",
:arg_type => String,
:required => true
))
add_arg_table(
s,
["--method", "-w"],
Dict(
:help => "method to calculate entropy.\nit should be one of 'sampen', 'apen', 'ksen'.",
:arg_type => String,
:range_tester => (x -> x in keys(method_map)),
:required => true
))
add_arg_table(
s,
["--dimension", "--dim", "-d", "-m"],
Dict(
:help => "dimension for embedding",
:arg_type => Int,
:required => true
))
add_arg_table(
s,
["--tolerance", "--tor", "-r"],
Dict(
:help => "tolerance for matching vertors",
:arg_type => Float32,
:required => true
))
args = parse_args(s)
if args["input"] != "-"
# read from file
filename = args["input"]
h5open(filename, "r") do fd
tsdata = read(fd, "tsdata")
end
else
# read from stdin
tsdata = parse.(Float32, split(readline(stdin), (' ', ',')))
end
func = method_map[args["method"]]
dim = args["dimension"]
tol = args["tolerance"]
result = func(tsdata, dim, tol)
println(result)
end
cmd_entrypoint()
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment