Skip to content

Instantly share code, notes, and snippets.

@ethanniser
Last active March 18, 2024 14:54
Show Gist options
  • Save ethanniser/3eab582d9286e3ba8adba40b6053b481 to your computer and use it in GitHub Desktop.
Save ethanniser/3eab582d9286e3ba8adba40b6053b481 to your computer and use it in GitHub Desktop.
1 billion row challenge in a SINGLE EXPRESSION WITH ELIXIR!!
Mix.install([
{:explorer, "~> 0.8.0"}
])
# Update to the filepath.
filepath = "./data/weather_stations.csv"
defmodule Challenge do
import Explorer.Series
require Explorer.DataFrame, as: DF
def run(filepath) do
DF.from_csv!(
filepath,
delimiter: ";",
header: false,
eol_delimiter: "\n",
lazy: true,
dtypes: [column_1: :category, column_2: {:f, 32}]
)
|> DF.rename(["station", "measurement"])
|> DF.group_by("station")
|> DF.summarise(min: min(measurement), mean: mean(measurement), max: max(measurement))
|> DF.mutate(min: round(min, 1), mean: round(mean, 1), max: round(max, 1))
|> DF.sort_by(station)
|> DF.collect()
|> DF.to_rows_stream(atom_keys: true)
|> Stream.map(&("#{&1.station}=#{&1.min}/#{&1.mean}/#{&1.max}"))
|> Enum.join(", ")
|> IO.puts()
end
end
Challenge.run(filepath)
@ryanwinchester
Copy link

<3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment