Skip to content

Instantly share code, notes, and snippets.

@RomanKotov
Last active March 17, 2024 17:21
Show Gist options
  • Save RomanKotov/c93793109afc7476adaa70a292a84b6e to your computer and use it in GitHub Desktop.
Save RomanKotov/c93793109afc7476adaa70a292a84b6e to your computer and use it in GitHub Desktop.
Compare different_approaches to build ranges of consecutive items https://x.com/chgeuer/status/1768027992691781757
Mix.install([
{:benchee, "~> 1.3"}
])
defmodule TailRecursive do
defstruct from: nil, to: nil
def identify([], _), do: []
def identify([item], _), do: %__MODULE__{from: item, to: item}
def identify(v, is_next) when is_list(v) do
v
|> Enum.sort()
|> Enum.dedup()
|> impl([{}], is_next)
end
defp impl([], a, _), do: reverse(a, [])
defp impl([current | tail], [{from, to} | acc], is_next) do
if is_next.(to, current) do
impl(tail, [{from, current} | acc], is_next)
else
impl(tail, [{current, current}, {from, to} | acc], is_next)
end
end
defp impl([current | tail], [{} | acc], is_next) do
impl(tail, [{current, current} | acc], is_next)
end
defp reverse([], a), do: a
defp reverse([{from, to} | tail], a) do
reverse(tail, [%__MODULE__{from: from, to: to} | a])
end
end
defmodule TailSinglePass do
defstruct from: nil, to: nil
def identify([], _), do: []
def identify([item], _), do: %__MODULE__{from: item, to: item}
def identify(v, is_next) when is_list(v) do
v
|> Enum.sort(:desc)
|> then(fn [max | rest] -> impl(rest, [{max, max}], is_next) end)
end
def impl([], [{from, to} | acc], _), do: [%__MODULE__{from: from, to: to} | acc]
def impl([item, item | rest], acc, is_next), do: impl([item | rest], acc, is_next)
def impl([previous | tail], [{from, to} | acc], is_next) do
if is_next.(previous, from) do
impl(tail, [{previous, to} | acc], is_next)
else
impl(tail, [{previous, previous}, %__MODULE__{from: from, to: to} | acc], is_next)
end
end
end
defmodule RecursiveEnumerate do
defstruct from: nil, to: nil
def identify([], _), do: []
def identify([item], _), do: %__MODULE__{from: item, to: item}
def identify(v, is_next) when is_list(v) do
v
|> Enum.sort()
|> Enum.dedup()
|> impl(is_next)
end
defp impl([], _), do: []
defp impl([x], _), do: [%__MODULE__{from: x, to: x}]
defp impl([current | tail], is_next) do
{range, rest} =
%__MODULE__{from: current, to: current}
|> enumerate_sequence(tail, is_next)
[range | impl(rest, is_next)]
end
defp enumerate_sequence(range, [], _), do: {range, []}
defp enumerate_sequence(range, [next | tail], is_next) do
if is_next.(range.to, next) do
enumerate_sequence(%__MODULE__{range | to: next}, tail, is_next)
else
{range, [next | tail]}
end
end
end
defmodule Recursive do
defstruct from: nil, to: nil
def identify([], _), do: []
def identify([item], _), do: %__MODULE__{from: item, to: item}
def identify(v, is_next) do
[item | tail] =
v
|> Enum.sort()
|> Enum.dedup()
impl({item, item}, tail, is_next)
end
defp impl({from, to}, [], _), do: [%__MODULE__{from: from, to: to}]
defp impl({from, to}, [next | tail], is_next) do
if is_next.(to, next) do
impl({from, next}, tail, is_next)
else
[%__MODULE__{from: from, to: to} | impl({next, next}, tail, is_next)]
end
end
end
defmodule ZipReduce do
defstruct from: nil, to: nil
def identify([], _), do: []
def identify([item], _), do: %__MODULE__{from: item, to: item}
def identify(v, is_next) when is_list(v) and is_function(is_next, 2) do
previous_items =
v
|> Enum.sort()
|> Enum.dedup()
reduce_fn = fn
previous, current, [{from, _} = range | tail] ->
if is_next.(previous, current) do
[{from, current} | tail]
else
[{current, current}, range | tail]
end
end
[head | next_items] = previous_items
previous_items
|> Enum.zip_reduce(next_items, [{head, head}], reduce_fn)
|> reverse([])
end
defp reverse([], a), do: a
defp reverse([{from, to} | tail], a) do
reverse(tail, [%__MODULE__{from: from, to: to} | a])
end
end
defmodule IsNext do
def integer(previous, next), do: previous + 1 == next
end
ExUnit.start()
defmodule SequenceTest do
use ExUnit.Case
def original_data do
[
-1, 0, 1, 2, 3, 4, 5, 6, 7,
1999, 2000, 2001,
2010, 2011, 2012,
2014,
2020, 2021, 2022, 2023, 2024
]
end
def generate_wide_random_data(range) do
for _ <- range, do: Enum.random(-1_000_000..1_000_000)
end
def generate_narrow_random_data(range) do
for _ <- range, do: Enum.random(1..1000)
end
test "simple case" do
data = original_data()
tail_results = identify(data, TailRecursive)
tail_single_pass_results = identify(data, TailSinglePass)
recursive_enum_results = identify(data, RecursiveEnumerate)
recursive_results = identify(data, Recursive)
zip_results = identify(data, ZipReduce)
assert tail_results == tail_single_pass_results
assert tail_results == recursive_enum_results
assert tail_results == recursive_results
assert tail_results == zip_results
end
test "benchmark" do
data = generate_wide_random_data(1..1_000_000)
{tail_time, tail_results} = measure_time(data, TailRecursive)
{tail_single_pass_time, tail_single_pass_results} = measure_time(data, TailSinglePass)
{enumerate_time, enumerate_results} = measure_time(data, RecursiveEnumerate)
{recursive_time, recursive_results} = measure_time(data, Recursive)
{zip_time, zip_results} = measure_time(data, ZipReduce)
IO.inspect(%{
TailRecursive => tail_time,
TailSinglePass => tail_single_pass_time,
RecursiveEnumerate => enumerate_time,
Recursive => recursive_time,
ZipReduce => zip_time
}, label: "Example time (seconds)")
assert tail_results == tail_single_pass_results
assert tail_results == enumerate_results
assert tail_results == recursive_results
assert tail_results == zip_results
end
defp identify(data, module) do
data
|> module.identify(&IsNext.integer/2)
|> Enum.map(&{&1.from, &1.to})
end
defp measure_time(data, module) do
{time, results} = :timer.tc(fn -> identify(data, module) end)
{time / 1_000_000, results}
end
def benchmark_with_benchee do
modules = [TailRecursive, TailSinglePass, RecursiveEnumerate, Recursive, ZipReduce]
cases =
for module <- modules, into: %{} do
{
inspect(module),
fn data -> module.identify(data, &IsNext.integer/2) end
}
end
Benchee.run(
cases,
inputs: %{
"100 items, narrow" => generate_narrow_random_data(1..1_00),
"100 items, wide" => generate_wide_random_data(1..1_00),
"10000 items, narrow" => generate_narrow_random_data(1..1_000),
"10000 items, wide" => generate_wide_random_data(1..1_000),
"1000000 items, narrow" => generate_narrow_random_data(1..1_000_000),
"1000000 items, wide" => generate_wide_random_data(1..1_000_000),
"original" => original_data()
},
time: 15,
memory_time: 2
)
end
end
ExUnit.run()
SequenceTest.benchmark_with_benchee()
range = 1..1_000_000
data = for _ <- range, do: Enum.random(range)
uniq_task = Task.async(fn -> :timer.tc(fn -> data |> Enum.sort() |> Enum.uniq() end) end)
dedup_task = Task.async(fn -> :timer.tc(fn -> data |> Enum.sort() |> Enum.dedup() end) end)
{time_uniq, uniq_results} = Task.await(uniq_task)
{time_dedup, dedup_results} = Task.await(dedup_task)
results_match = uniq_results == dedup_results
dbg({results_match, time_uniq, time_dedup})
##### With input 100 items, narrow #####
Name ips average deviation median 99th %
TailSinglePass 462.80 K 2.16 μs ±772.78% 2 μs 3.13 μs
TailRecursive 407.14 K 2.46 μs ±714.89% 2.29 μs 4.58 μs
ZipReduce 385.00 K 2.60 μs ±717.94% 2.38 μs 5.50 μs
Recursive 360.37 K 2.77 μs ±670.92% 2.63 μs 3.63 μs
RecursiveEnumerate 320.11 K 3.12 μs ±637.17% 2.96 μs 4.08 μs
Comparison:
TailSinglePass 462.80 K
TailRecursive 407.14 K - 1.14x slower +0.30 μs
ZipReduce 385.00 K - 1.20x slower +0.44 μs
Recursive 360.37 K - 1.28x slower +0.61 μs
RecursiveEnumerate 320.11 K - 1.45x slower +0.96 μs
Memory usage statistics:
Name Memory usage
TailSinglePass 16.91 KB
TailRecursive 18.52 KB - 1.10x memory usage +1.61 KB
ZipReduce 18.57 KB - 1.10x memory usage +1.66 KB
Recursive 17.04 KB - 1.01x memory usage +0.125 KB
RecursiveEnumerate 17.27 KB - 1.02x memory usage +0.36 KB
**All measurements for memory usage were the same**
##### With input 100 items, wide #####
Name ips average deviation median 99th %
TailSinglePass 443.42 K 2.26 μs ±838.09% 2.08 μs 3.04 μs
TailRecursive 383.72 K 2.61 μs ±712.32% 2.42 μs 3.75 μs
ZipReduce 361.39 K 2.77 μs ±646.03% 2.58 μs 5.58 μs
Recursive 343.84 K 2.91 μs ±649.72% 2.75 μs 5 μs
RecursiveEnumerate 316.60 K 3.16 μs ±543.40% 3 μs 4.46 μs
Comparison:
TailSinglePass 443.42 K
TailRecursive 383.72 K - 1.16x slower +0.35 μs
ZipReduce 361.39 K - 1.23x slower +0.51 μs
Recursive 343.84 K - 1.29x slower +0.65 μs
RecursiveEnumerate 316.60 K - 1.40x slower +0.90 μs
Memory usage statistics:
Name Memory usage
TailSinglePass 19.61 KB
TailRecursive 21.08 KB - 1.07x memory usage +1.47 KB
ZipReduce 21.13 KB - 1.08x memory usage +1.52 KB
Recursive 19.52 KB - 1.00x memory usage -0.09375 KB
RecursiveEnumerate 19.49 KB - 0.99x memory usage -0.11719 KB
**All measurements for memory usage were the same**
##### With input 10000 items, narrow #####
Name ips average deviation median 99th %
TailSinglePass 37.24 K 26.86 μs ±23.54% 25.21 μs 39.13 μs
TailRecursive 34.72 K 28.80 μs ±24.17% 27.29 μs 42.79 μs
Recursive 34.63 K 28.88 μs ±24.64% 27.58 μs 39.54 μs
ZipReduce 34.55 K 28.94 μs ±20.17% 27.33 μs 43.00 μs
RecursiveEnumerate 28.19 K 35.47 μs ±244.41% 33.67 μs 49.83 μs
Comparison:
TailSinglePass 37.24 K
TailRecursive 34.72 K - 1.07x slower +1.94 μs
Recursive 34.63 K - 1.08x slower +2.02 μs
ZipReduce 34.55 K - 1.08x slower +2.09 μs
RecursiveEnumerate 28.19 K - 1.32x slower +8.61 μs
Memory usage statistics:
Name Memory usage
TailSinglePass 170.29 KB
TailRecursive 164.55 KB - 0.97x memory usage -5.73438 KB
Recursive 154.66 KB - 0.91x memory usage -15.62500 KB
ZipReduce 164.60 KB - 0.97x memory usage -5.68750 KB
RecursiveEnumerate 163.97 KB - 0.96x memory usage -6.32031 KB
**All measurements for memory usage were the same**
##### With input 10000 items, wide #####
Name ips average deviation median 99th %
TailSinglePass 35.06 K 28.52 μs ±35.76% 25.88 μs 63.42 μs
TailRecursive 31.49 K 31.75 μs ±46.01% 29.38 μs 63.50 μs
ZipReduce 30.78 K 32.49 μs ±25.12% 30.21 μs 64.13 μs
Recursive 28.05 K 35.65 μs ±21.57% 33.58 μs 56.54 μs
RecursiveEnumerate 25.79 K 38.78 μs ±20.00% 36.92 μs 60.00 μs
Comparison:
TailSinglePass 35.06 K
TailRecursive 31.49 K - 1.11x slower +3.23 μs
ZipReduce 30.78 K - 1.14x slower +3.97 μs
Recursive 28.05 K - 1.25x slower +7.13 μs
RecursiveEnumerate 25.79 K - 1.36x slower +10.26 μs
Memory usage statistics:
Name Memory usage
TailSinglePass 215.77 KB
TailRecursive 226.83 KB - 1.05x memory usage +11.06 KB
ZipReduce 226.88 KB - 1.05x memory usage +11.11 KB
Recursive 211.20 KB - 0.98x memory usage -4.56250 KB
RecursiveEnumerate 211.18 KB - 0.98x memory usage -4.58594 KB
**All measurements for memory usage were the same**
##### With input 1000000 items, narrow #####
Name ips average deviation median 99th %
Recursive 9.20 108.75 ms ±16.74% 99.03 ms 152.69 ms
ZipReduce 8.83 113.29 ms ±17.67% 103.79 ms 164.12 ms
RecursiveEnumerate 8.81 113.56 ms ±15.98% 101.69 ms 155.48 ms
TailRecursive 8.79 113.71 ms ±17.59% 102.30 ms 155.65 ms
TailSinglePass 8.28 120.71 ms ±9.65% 118.69 ms 160.41 ms
Comparison:
Recursive 9.20
ZipReduce 8.83 - 1.04x slower +4.54 ms
RecursiveEnumerate 8.81 - 1.04x slower +4.82 ms
TailRecursive 8.79 - 1.05x slower +4.97 ms
TailSinglePass 8.28 - 1.11x slower +11.97 ms
Memory usage statistics:
Name Memory usage
Recursive 203.13 MB
ZipReduce 203.15 MB - 1.00x memory usage +0.0153 MB
RecursiveEnumerate 203.16 MB - 1.00x memory usage +0.0229 MB
TailRecursive 203.15 MB - 1.00x memory usage +0.0153 MB
TailSinglePass 232.26 MB - 1.14x memory usage +29.12 MB
**All measurements for memory usage were the same**
##### With input 1000000 items, wide #####
Name ips average deviation median 99th %
TailSinglePass 6.99 143.04 ms ±13.87% 135.46 ms 199.42 ms
TailRecursive 6.95 143.84 ms ±7.94% 139.10 ms 191.68 ms
ZipReduce 6.91 144.70 ms ±6.22% 141.84 ms 170.99 ms
RecursiveEnumerate 6.51 153.52 ms ±7.12% 151.90 ms 194.96 ms
Recursive 6.44 155.18 ms ±13.19% 146.74 ms 200.63 ms
Comparison:
TailSinglePass 6.99
TailRecursive 6.95 - 1.01x slower +0.80 ms
ZipReduce 6.91 - 1.01x slower +1.66 ms
RecursiveEnumerate 6.51 - 1.07x slower +10.48 ms
Recursive 6.44 - 1.08x slower +12.14 ms
Memory usage statistics:
Name Memory usage
TailSinglePass 295.71 MB
TailRecursive 301.19 MB - 1.02x memory usage +5.48 MB
ZipReduce 301.19 MB - 1.02x memory usage +5.48 MB
RecursiveEnumerate 296.28 MB - 1.00x memory usage +0.56 MB
Recursive 289.18 MB - 0.98x memory usage -6.53860 MB
**All measurements for memory usage were the same**
##### With input original #####
Name ips average deviation median 99th %
TailSinglePass 2.67 M 374.10 ns ±14947.37% 250 ns 417 ns
Recursive 2.51 M 398.23 ns ±9032.98% 292 ns 1416 ns
TailRecursive 2.21 M 453.48 ns ±10881.95% 292 ns 1459 ns
ZipReduce 1.87 M 535.64 ns ±8249.00% 334 ns 1500 ns
RecursiveEnumerate 1.61 M 620.82 ns ±4047.50% 500 ns 1583 ns
Comparison:
TailSinglePass 2.67 M
Recursive 2.51 M - 1.06x slower +24.13 ns
TailRecursive 2.21 M - 1.21x slower +79.38 ns
ZipReduce 1.87 M - 1.43x slower +161.54 ns
RecursiveEnumerate 1.61 M - 1.66x slower +246.72 ns
Memory usage statistics:
Name Memory usage
TailSinglePass 2.15 KB
Recursive 2.15 KB - 1.00x memory usage +0 KB
TailRecursive 2.48 KB - 1.15x memory usage +0.33 KB
ZipReduce 2.52 KB - 1.17x memory usage +0.38 KB
RecursiveEnumerate 2.52 KB - 1.17x memory usage +0.38 KB
**All measurements for memory usage were the same**
@chgeuer
Copy link

chgeuer commented Mar 14, 2024

Very cool...

Mix.install([
  {:benchee, "~> 1.3"}
])

defmodule ConsecutiveSequence do
  defstruct from: nil, to: nil

  defp impl([], a, _) do
    a
    |> Enum.reverse()
    |> Enum.map(fn {from, to} -> %__MODULE__{from: from, to: to} end)
  end

  defp impl([current | tail], [{from, to} | acc], is_next) do
    if is_next.(to, current) do
      impl(tail, [{from, current} | acc], is_next)
    else
      impl(tail, [{current, current}, {from, to} | acc], is_next)
    end
  end

  defp impl([current | tail], [{} | acc], is_next) do
    impl(tail, [{current, current} | acc], is_next)
  end

  def identify(v, is_next) when is_list(v) do
    v
    # |> Enum.sort() |> Enum.dedup()
    |> impl([{}], is_next)
  end
end

defmodule AnotherSolution do
  # Kudos to https://twitter.com/kotovr/status/1768266706240029063
  defstruct from: nil, to: nil

  def identify(v, is_next) when is_list(v) do
    v
    #|> Enum.sort() |> Enum.dedup()
    |> impl(is_next)
  end

  defp impl([], _), do: []
  defp impl([x], _), do: [%__MODULE__{from: x, to: x}]
  defp impl([current | tail], is_next) do
    {range, rest} =
      %__MODULE__{from: current, to: current}
      |> enumerate_sequence(tail, is_next)
    
    [range | impl(rest, is_next)]
  end

  defp enumerate_sequence(range, [], _), do: {range, []}
  defp enumerate_sequence(range, [next | tail], is_next) do
    if is_next.(range.to, next) do
      enumerate_sequence(%__MODULE__{range | to: next}, tail, is_next)
    else
      {range, [next | tail]}
    end
  end
end

defmodule IsNext do
  def integer(previous, next), do: previous + 1 == next
end

list = [
  -1, 0, 1, 2, 3, 4, 5, 6, 7, 
  1999, 2000, 2001, 2010, 2011, 2012, 
  2014, 
  2020, 2021, 2022, 2023, 2024
]


Benchee.run(
  %{
    "tail_recurse" => fn -> ConsecutiveSequence.identify(list, &IsNext.integer/2) end,
    "recursive" => fn -> AnotherSolution.identify(list, &IsNext.integer/2) end
  },
  time: 100,
  memory_time: 2
)

gives

Formatting results...

Name                   ips        average  deviation         median         99th %
recursive           9.93 K      100.71 μs   ±643.52%      102.40 μs        1024 μs
tail_recurse        9.49 K      105.34 μs   ±839.11%      102.40 μs      921.60 μs

Comparison: 
recursive           9.93 K
tail_recurse        9.49 K - 1.05x slower +4.62 μs

@RomanKotov
Copy link
Author

@chgeuer , thank you for the benchmarks!
I have slightly updated tail-recursive function, so it should work better now.

Mix.install([
  {:benchee, "~> 1.3"}
])

defmodule ConsecutiveSequence do
  defstruct from: nil, to: nil
  
  defp reverse([], a), do: a
  defp reverse([{from, to} | tail], a) do
    reverse(tail, [%__MODULE__{from: from, to: to} | a])
  end
  
  defp impl([], a, _), do: reverse(a, [])
  defp impl([current | tail], [{from, to} | acc], is_next) do
    if is_next.(to, current) do
      impl(tail, [{from, current} | acc], is_next)
    else
      impl(tail, [{current, current}, {from, to} | acc], is_next)
    end
  end

  defp impl([current | tail], [{} | acc], is_next) do
    impl(tail, [{current, current} | acc], is_next)
  end

  def identify(v, is_next) when is_list(v) do
    v
    # |> Enum.sort() |> Enum.dedup()
    |> impl([{}], is_next)
  end
end

defmodule AnotherSolution do
  # Kudos to https://twitter.com/kotovr/status/1768266706240029063
  defstruct from: nil, to: nil

  def identify(v, is_next) when is_list(v) do
    v
    #|> Enum.sort() |> Enum.dedup()
    |> impl(is_next)
  end

  defp impl([], _), do: []
  defp impl([x], _), do: [%__MODULE__{from: x, to: x}]
  defp impl([current | tail], is_next) do
    {range, rest} =
      %__MODULE__{from: current, to: current}
      |> enumerate_sequence(tail, is_next)
    
    [range | impl(rest, is_next)]
  end

  defp enumerate_sequence(range, [], _), do: {range, []}
  defp enumerate_sequence(range, [next | tail], is_next) do
    if is_next.(range.to, next) do
      enumerate_sequence(%__MODULE__{range | to: next}, tail, is_next)
    else
      {range, [next | tail]}
    end
  end
end

defmodule IsNext do
  def integer(previous, next), do: previous + 1 == next
end

list = [
  -1, 0, 1, 2, 3, 4, 5, 6, 7, 
  1999, 2000, 2001, 2010, 2011, 2012, 
  2014, 
  2020, 2021, 2022, 2023, 2024
]


Benchee.run(
  %{
    "tail_recurse" => fn -> ConsecutiveSequence.identify(list, &IsNext.integer/2) end,
    "recursive" => fn -> AnotherSolution.identify(list, &IsNext.integer/2) end
  },
  time: 100,
  memory_time: 2
)
Formatting results...

Name                   ips        average  deviation         median         99th %
tail_recurse        3.31 M      301.83 ns ±68443.96%         167 ns         333 ns
recursive           2.19 M      456.78 ns ±21369.61%         375 ns        1500 ns

Comparison: 
tail_recurse        3.31 M
recursive           2.19 M - 1.51x slower +154.94 ns

Memory usage statistics:

Name            Memory usage
tail_recurse         1.13 KB
recursive            1.18 KB - 1.04x memory usage +0.0469 KB

Just removed the main overhead source in tail-recursive case - constructing the result set in reverse order.

a |> Enum.reverse() |> Enum.map(...) constructs 2 new lists, this means possible GC runs and extra operations. Constructed the result and reversed the data in one pass.

Without this optimization the results for both approaches are relatively equal.

Usually tail-recursive approach works fast for most tasks. However, simple recursion may work better if you need to accumulate list and reverse it in the end. Tail-recursive approach will need to create a new, reversed list (it consumes time and may trigger GC), but simple recursion will not need to do it, because it already has a correct list on the stack (immutability helps here very much). It is better to measure the performance for this case. Here is a section in Erlang docs about it.

I wanted to evaluate this. It looks like enumerate_sequence function adds extra performance overhead for simple recursion here. It seems that it is better to use tail-recursive one.

@RomanKotov
Copy link
Author

@chgeuer ,
Have updated extra implementations and updated benchmarks. Possibly you will be able to combine them and pick the best implementation for you.

@chgeuer
Copy link

chgeuer commented Mar 15, 2024

Wow, @RomanKotov, you're on fire, cool. Yeah, my little experiment doesn't need much perf. I like how your seemingly more complex code ist faster. I guess because you don't need the reverse() at the end?

@RomanKotov
Copy link
Author

Hi @chgeuer

Thank you! Have added extra approach and tested it. I wanted to measure how different approaches work for this task. Also wanted to verify if tail-recursive approach is much faster than recursive one.

Originally (in the Twitter post) the algorithm used Enum.uniq/1, (internally uniq_list/3). Recursive algorithm used Enum.dedup/1, (which is single tail-recursive pass + reverse).

Then both tail-recursive and recursive algorithms migrated to Enum.dedup/1, but recursive algorithm was still faster. It iterated through the list only once (I do not count initial calls to Enum.sort/1 and Enum.dedup/1. Tail-recursive algorithm needed 3 passes - create ranges, reverse the result, and wrap the results to a struct.

I removed this overhead in the next iteration, by combining Enum.map/2 and Enum.reverse/1 into a single pass of ConsecutiveSequence.reverse/2 in this comment. Now tail-recursive approach started to work much faster than recursive one. It used only 2 passes instead of 3 (generate ranges and map + reverse).

I looked into different other approaches to speed up the algorithm. Let's see what do they use:

  1. TailRecursive - Slightly optimised version of original post.
  2. TailSinglePass - Tries to do only one tail-recursive pass. It uses Enum.sort/2 function, so the input to impl/3 will be in descending order. This allows us to omit final reverse/2 at all. We already sort the data, so why not to leverage this. I have also removed a call to Enum.dedup/1, because added deduplication to the impl/3 function. It seems this approach is the fastest in most cases (except the case when we have very large number of items, but the distribution is narrow).
  3. RecursiveEnumerate - uses single pass on the list. It finds new range in the impl/2 function. Then it traverses this range, and gathers all items into a single range item via enumerate_sequence/2.
  4. Recursive - Single pass on the list. It uses a head of accumulator to check if impl/3 can extend the range.
  5. ZipReduce - Uses idea to compare a list to its shifted copy with Enum.zip_reduce/4. Needs 2 passes - to build ranges and to reverse them.

Of course there could be more ways to combine ideas from these approaches, to gain more speed, but it is would be more difficult to maintain them in future. The results are pretty similar, especially on large number of items.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment