Skip to content

Instantly share code, notes, and snippets.

@jmert

jmert/lttb.jl Secret

Last active September 28, 2025 17:46
Show Gist options
  • Select an option

  • Save jmert/4e1061bb42be80a4e517fc815b83f1bc to your computer and use it in GitHub Desktop.

Select an option

Save jmert/4e1061bb42be80a4e517fc815b83f1bc to your computer and use it in GitHub Desktop.
The Largest Triangle, Three-Buckets timestream reduction algorithm
using Statistics: mean
"""
x, y = lttb(v::AbstractVector, n = length(v)÷10)
The largest triangle, three-buckets reduction of the vector `v` over points `1:N` to a
new, shorter vector `y` at `x` with `length(x) == n`.
See https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
"""
function lttb(v::AbstractVector, n = length(v)÷10)
w = similar(v, n)
z = similar(w, Int)
N = length(v)
N == 0 && return (z, w)
# always take the first and last data point
@inbounds begin
w[1] = y₀ = v[1]
w[n] = v[N]
z[1] = x₀ = 1
z[n] = N
end
# split original vector into buckets of equal length (excluding two endpoints)
# - s[ii] is the inclusive lower edge of the bin
s = range(2, N, length = n-1)
@inline lower(k) = round(Int, s[k])
@inline upper(k) = k+1 < n ? round(Int, s[k+1]) : N-1
@inline binrange(k) = lower(k):upper(k)
# then for each bin
@inbounds for ii in 1:n-2
# calculate the mean of the next bin to use as a fixed end of the triangle
r = binrange(ii+1)
x₂ = mean(r)
y₂ = sum(@view v[r]) / length(r)
# then for each point in this bin, calculate the area of the triangle, keeping
# track of the maximum
r = binrange(ii)
x̂, ŷ, Â = first(r), v[first(r)], typemin(y₀)
for jj in r
x₁, y₁ = jj, v[jj]
# triangle area:
A = abs(x₀*(y₁-y₂) + x₁*(y₂-y₀) + x₂*(y₀-y₁)) / 2
# update coordinate if area is larger
if A >
x̂, ŷ, Â = x₁, y₁, A
end
x₀, y₀ = x₁, y₁
end
z[ii+1] =
w[ii+1] =
end
return (z, w)
end
@atthom
Copy link

atthom commented Feb 28, 2023

Hey, this algorithm looks interesting. Do you mind if I include it in a package like InteractiveViz.jl?

@jmert
Copy link
Author

jmert commented Mar 5, 2023

@atthom No objections. It was a relatively straightforward translation of the thesis cited in the docstring, so I can't really claim any ownership over it. If you want me to, I could submit a simple PR (as long as you kind of describe where/how you want this added).

@gbambini
Copy link

Not sure if it is valid/correct or usuful, but would like to drop this too:

"""
    x, y = lttb(v::AbstractVector, n)

The largest triangle, three-buckets reduction of the vector `v` over points `1:N` to a
new, shorter vector `y` at `x` with `length(x) == n`.

See https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
"""
function lttb(v::AbstractVector, n::Integer)
    N = length(v)
    N == 0 && return (Int[], similar(v, 0))

    w = similar(v, n)
    z = similar(w, Int)

    # always take the first and last data point
    @inbounds begin
        w[1] = y₀ = v[1]
        w[n] = v[N]
        z[1] = x₀ = 1
        z[n] = N
    end

    # split original vector into buckets of equal length (excluding two endpoints)
    #   - s[ii] is the inclusive lower edge of the bin
    s = range(2, N, length = n-1)
    @inline lower(k) = round(Int, s[k])
    @inline upper(k) = k+1 < n ? round(Int, s[k+1]) : N-1
    @inline binrange(k) = lower(k):upper(k)

    # then for each bin
    @inbounds for ii in 1:n-2
        # calculate the mean of the next bin to use as a fixed end of the triangle
        r = binrange(ii+1)
        x₂ = mean(r)
        y₂ = sum(@view v[r]) / length(r)

        # then for each point in this bin, calculate the area of the triangle, keeping
        # track of the maximum
        r = binrange(ii)
        x̂, ŷ, Â = first(r), v[first(r)], typemin(y₀)
        for jj in r
            x₁, y₁ = jj, v[jj]
            # triangle area:
            A = abs(x₀*(y₁-y₂) + x₁*(y₂-y₀) + x₂*(y₀-y₁)) / 2
            # update coordinate if area is larger
            if A > Â
                x̂, ŷ, Â = x₁, y₁, A
            end
            x₀, y₀ = x₁, y₁
        end
        z[ii+1] = x̂
        w[ii+1] =end

    return (z, w)
end

"""
    x, Y = lttb(V::AbstractMatrix, n)

The largest triangle, three-buckets reduction of a matrix `V` (where each row is a
series) over points `1:N` to a new, shorter matrix `Y` at indices `x`.

This version preserves features by selecting the point in each bucket that creates the
largest triangle area in *any* of the rows.

Example:
```Julia
using Plots # For visualization

# Generate some data
N = 2000
x_full = 1:N
series1 = 5 .* sin.(x_full ./ 50) .+ randn(N) .* 0.5
series2 = zeros(N)
series2[1000:1010] .= [0, 2, 8, 15, 8, 2, 0, 0, 0, 0, 0] # A sharp, isolated peak
series3 = cos.(x_full ./ 20) .* (x_full ./ 200)

# Combine into a matrix (each series is a row)
V = vcat(series1', series2', series3')

# Downsample from 2000 points to 150
n_downsampled = 150
x_new, W_new = lttb(V, n_downsampled)

# --- Plotting to see the result ---
p1 = plot(x_full, V[1,:], label="Original Series 1", color=:lightblue, legend=:topleft)
plot!(p1, x_new, W_new[1,:], label="LTTB Series 1", color=:blue, lw=2, markershape=:circle, ms=3)

p2 = plot(x_full, V[2,:], label="Original Series 2", color=:lightcoral, legend=:topleft)
plot!(p2, x_new, W_new[2,:], label="LTTB Series 2", color=:red, lw=2, markershape=:circle, ms=3)

p3 = plot(x_full, V[3,:], label="Original Series 3", color=:lightgreen, legend=:topleft)
plot!(p3, x_new, W_new[3,:], label="LTTB Series 3", color=:green, lw=2, markershape=:circle, ms=3)

plot(p1, p2, p3, layout=(3,1), size=(800, 600))
```
"""
function lttb(V::AbstractMatrix, n::Integer)
    M, N = size(V) # M = number of series, N = number of points
    N == 0 && return (Int[], similar(V, M, 0))

    W = similar(V, M, n) # Output matrix for y-values
    z = Vector{Int}(undef, n) # Output vector for x-indices

    # Always take the first and last data points for all series
    @inbounds begin
        W[:, 1] = V[:, 1]
        W[:, n] = V[:, N]
        z[1] = 1
        z[n] = N
    end

    # Bucket setup is identical to the vector version
    s = range(2, N, length = n-1)
    @inline lower(k) = round(Int, s[k])
    @inline upper(k) = k+1 < n ? round(Int, s[k+1]) : N-1
    @inline binrange(k) = lower(k):upper(k)

    # Keep track of the last selected point (x₀, Y₀ vector)
    x₀ = z[1]
    Y₀ = W[:, 1] # This is now a vector of y-values

    @inbounds for ii in 1:n-2
        # Calculate the mean of the *next* bin for each series
        r_next = binrange(ii+1)
        x₂ = mean(r_next)
        # Calculate mean along dimension 2 (the points) for each series (row)
        Y₂ = mean(@view(V[:, r_next]), dims=2)

        # Find the point in the *current* bin with the max area across all series
        r_current = binrange(ii)
        Â = -1.0 # Max area found so far in this bucket= first(r_current) # Best x-index found so far

        for jj in r_current
            x₁ = jj
            Y₁ = @view V[:, jj] # Candidate point's y-values (a vector)
            
            # Find the max area this point produces across all series
            current_max_area = 0.0
            for m in 1:M # Iterate through each series (row)
                A = abs(x₀*(Y₁[m]-Y₂[m]) + x₁*(Y₂[m]-Y₀[m]) + x₂*(Y₀[m]-Y₁[m])) / 2
                if A > current_max_area
                    current_max_area = A
                end
            end

            # If this point's max area is the best we've seen in this bucket, keep it
            if current_max_area > Â
                Â = current_max_area
                x̂ = x₁
            end
        end
        
        # Store the winning point and update the last selected point
        z[ii+1] = x̂
        W[:, ii+1] = V[:, x̂]
        x₀ = x̂
        Y₀ = W[:, ii+1]
    end

    return (z, W)
end

Main changes:

  • line N == 0 && return (Int[], similar(v, 0)) to have a consistent output
  • lttb for multiseries input

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment