Skip to content

Instantly share code, notes, and snippets.

@ryanwinchester
Forked from ethanniser/1brc.exs
Last active March 18, 2024 15:11
Show Gist options
  • Save ryanwinchester/90620bece9f046561136de9615707bf9 to your computer and use it in GitHub Desktop.
Save ryanwinchester/90620bece9f046561136de9615707bf9 to your computer and use it in GitHub Desktop.
1 billion row challenge in a SINGLE EXPRESSION!
Mix.install([
{:explorer, "~> 0.8.0"}
])
filepath = "./measurements.txt"
defmodule Challenge do
import Explorer.Series
require Explorer.DataFrame, as: DF
def run(filepath) do
DF.from_csv!(
filepath,
delimiter: ";",
header: false,
eol_delimiter: "\n",
lazy: true,
dtypes: [column_1: :category, column_2: {:f, 32}]
)
|> DF.rename(["station", "measurement"])
|> DF.group_by("station")
|> DF.summarise(min: min(measurement), mean: mean(measurement), max: max(measurement))
|> DF.mutate(min: round(min, 1), mean: round(mean, 1), max: round(max, 1))
|> DF.sort_by(station)
|> DF.collect()
|> DF.to_rows()
|> Enum.map_join(", ", fn %{"station" => station, "min" => min, "mean" => mean, "max" => max} ->
"#{station}=#{min}/#{mean}/#{max}"
end)
end
end
Code.ensure_loaded(Challenge)
# Time the execution.
{time_us, result} =
:timer.tc(fn ->
Challenge.run(filepath)
end)
# Print results and time.
IO.puts(result)
IO.puts("Time: " <> to_string(time_us / 1_000_000) <> "s")
@ryanwinchester
Copy link
Author

ryanwinchester commented Mar 18, 2024

$ elixir 1brc.exs
Time: 6.355899s

@ryanwinchester
Copy link
Author

Time: 6.27903s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment