-
-
Save jmert/4e1061bb42be80a4e517fc815b83f1bc to your computer and use it in GitHub Desktop.
The Largest Triangle, Three-Buckets timestream reduction algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using Statistics: mean | |
| """ | |
| x, y = lttb(v::AbstractVector, n = length(v)÷10) | |
| The largest triangle, three-buckets reduction of the vector `v` over points `1:N` to a | |
| new, shorter vector `y` at `x` with `length(x) == n`. | |
| See https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf | |
| """ | |
| function lttb(v::AbstractVector, n = length(v)÷10) | |
| w = similar(v, n) | |
| z = similar(w, Int) | |
| N = length(v) | |
| N == 0 && return (z, w) | |
| # always take the first and last data point | |
| @inbounds begin | |
| w[1] = y₀ = v[1] | |
| w[n] = v[N] | |
| z[1] = x₀ = 1 | |
| z[n] = N | |
| end | |
| # split original vector into buckets of equal length (excluding two endpoints) | |
| # - s[ii] is the inclusive lower edge of the bin | |
| s = range(2, N, length = n-1) | |
| @inline lower(k) = round(Int, s[k]) | |
| @inline upper(k) = k+1 < n ? round(Int, s[k+1]) : N-1 | |
| @inline binrange(k) = lower(k):upper(k) | |
| # then for each bin | |
| @inbounds for ii in 1:n-2 | |
| # calculate the mean of the next bin to use as a fixed end of the triangle | |
| r = binrange(ii+1) | |
| x₂ = mean(r) | |
| y₂ = sum(@view v[r]) / length(r) | |
| # then for each point in this bin, calculate the area of the triangle, keeping | |
| # track of the maximum | |
| r = binrange(ii) | |
| x̂, ŷ, Â = first(r), v[first(r)], typemin(y₀) | |
| for jj in r | |
| x₁, y₁ = jj, v[jj] | |
| # triangle area: | |
| A = abs(x₀*(y₁-y₂) + x₁*(y₂-y₀) + x₂*(y₀-y₁)) / 2 | |
| # update coordinate if area is larger | |
| if A > Â | |
| x̂, ŷ, Â = x₁, y₁, A | |
| end | |
| x₀, y₀ = x₁, y₁ | |
| end | |
| z[ii+1] = x̂ | |
| w[ii+1] = ŷ | |
| end | |
| return (z, w) | |
| end |
Author
@atthom No objections. It was a relatively straightforward translation of the thesis cited in the docstring, so I can't really claim any ownership over it. If you want me to, I could submit a simple PR (as long as you kind of describe where/how you want this added).
Not sure if it is valid/correct or usuful, but would like to drop this too:
"""
x, y = lttb(v::AbstractVector, n)
The largest triangle, three-buckets reduction of the vector `v` over points `1:N` to a
new, shorter vector `y` at `x` with `length(x) == n`.
See https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
"""
function lttb(v::AbstractVector, n::Integer)
N = length(v)
N == 0 && return (Int[], similar(v, 0))
w = similar(v, n)
z = similar(w, Int)
# always take the first and last data point
@inbounds begin
w[1] = y₀ = v[1]
w[n] = v[N]
z[1] = x₀ = 1
z[n] = N
end
# split original vector into buckets of equal length (excluding two endpoints)
# - s[ii] is the inclusive lower edge of the bin
s = range(2, N, length = n-1)
@inline lower(k) = round(Int, s[k])
@inline upper(k) = k+1 < n ? round(Int, s[k+1]) : N-1
@inline binrange(k) = lower(k):upper(k)
# then for each bin
@inbounds for ii in 1:n-2
# calculate the mean of the next bin to use as a fixed end of the triangle
r = binrange(ii+1)
x₂ = mean(r)
y₂ = sum(@view v[r]) / length(r)
# then for each point in this bin, calculate the area of the triangle, keeping
# track of the maximum
r = binrange(ii)
x̂, ŷ, Â = first(r), v[first(r)], typemin(y₀)
for jj in r
x₁, y₁ = jj, v[jj]
# triangle area:
A = abs(x₀*(y₁-y₂) + x₁*(y₂-y₀) + x₂*(y₀-y₁)) / 2
# update coordinate if area is larger
if A > Â
x̂, ŷ, Â = x₁, y₁, A
end
x₀, y₀ = x₁, y₁
end
z[ii+1] = x̂
w[ii+1] = ŷ
end
return (z, w)
end
"""
x, Y = lttb(V::AbstractMatrix, n)
The largest triangle, three-buckets reduction of a matrix `V` (where each row is a
series) over points `1:N` to a new, shorter matrix `Y` at indices `x`.
This version preserves features by selecting the point in each bucket that creates the
largest triangle area in *any* of the rows.
Example:
```Julia
using Plots # For visualization
# Generate some data
N = 2000
x_full = 1:N
series1 = 5 .* sin.(x_full ./ 50) .+ randn(N) .* 0.5
series2 = zeros(N)
series2[1000:1010] .= [0, 2, 8, 15, 8, 2, 0, 0, 0, 0, 0] # A sharp, isolated peak
series3 = cos.(x_full ./ 20) .* (x_full ./ 200)
# Combine into a matrix (each series is a row)
V = vcat(series1', series2', series3')
# Downsample from 2000 points to 150
n_downsampled = 150
x_new, W_new = lttb(V, n_downsampled)
# --- Plotting to see the result ---
p1 = plot(x_full, V[1,:], label="Original Series 1", color=:lightblue, legend=:topleft)
plot!(p1, x_new, W_new[1,:], label="LTTB Series 1", color=:blue, lw=2, markershape=:circle, ms=3)
p2 = plot(x_full, V[2,:], label="Original Series 2", color=:lightcoral, legend=:topleft)
plot!(p2, x_new, W_new[2,:], label="LTTB Series 2", color=:red, lw=2, markershape=:circle, ms=3)
p3 = plot(x_full, V[3,:], label="Original Series 3", color=:lightgreen, legend=:topleft)
plot!(p3, x_new, W_new[3,:], label="LTTB Series 3", color=:green, lw=2, markershape=:circle, ms=3)
plot(p1, p2, p3, layout=(3,1), size=(800, 600))
```
"""
function lttb(V::AbstractMatrix, n::Integer)
M, N = size(V) # M = number of series, N = number of points
N == 0 && return (Int[], similar(V, M, 0))
W = similar(V, M, n) # Output matrix for y-values
z = Vector{Int}(undef, n) # Output vector for x-indices
# Always take the first and last data points for all series
@inbounds begin
W[:, 1] = V[:, 1]
W[:, n] = V[:, N]
z[1] = 1
z[n] = N
end
# Bucket setup is identical to the vector version
s = range(2, N, length = n-1)
@inline lower(k) = round(Int, s[k])
@inline upper(k) = k+1 < n ? round(Int, s[k+1]) : N-1
@inline binrange(k) = lower(k):upper(k)
# Keep track of the last selected point (x₀, Y₀ vector)
x₀ = z[1]
Y₀ = W[:, 1] # This is now a vector of y-values
@inbounds for ii in 1:n-2
# Calculate the mean of the *next* bin for each series
r_next = binrange(ii+1)
x₂ = mean(r_next)
# Calculate mean along dimension 2 (the points) for each series (row)
Y₂ = mean(@view(V[:, r_next]), dims=2)
# Find the point in the *current* bin with the max area across all series
r_current = binrange(ii)
 = -1.0 # Max area found so far in this bucket
x̂ = first(r_current) # Best x-index found so far
for jj in r_current
x₁ = jj
Y₁ = @view V[:, jj] # Candidate point's y-values (a vector)
# Find the max area this point produces across all series
current_max_area = 0.0
for m in 1:M # Iterate through each series (row)
A = abs(x₀*(Y₁[m]-Y₂[m]) + x₁*(Y₂[m]-Y₀[m]) + x₂*(Y₀[m]-Y₁[m])) / 2
if A > current_max_area
current_max_area = A
end
end
# If this point's max area is the best we've seen in this bucket, keep it
if current_max_area > Â
 = current_max_area
x̂ = x₁
end
end
# Store the winning point and update the last selected point
z[ii+1] = x̂
W[:, ii+1] = V[:, x̂]
x₀ = x̂
Y₀ = W[:, ii+1]
end
return (z, W)
endMain changes:
- line
N == 0 && return (Int[], similar(v, 0))to have a consistent output - lttb for multiseries input
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey, this algorithm looks interesting. Do you mind if I include it in a package like InteractiveViz.jl?