Skip to content

Instantly share code, notes, and snippets.

@Butch78
Last active April 9, 2024 05:22
Show Gist options
  • Save Butch78/702944427d78da6727a277e1f54d65c8 to your computer and use it in GitHub Desktop.
Save Butch78/702944427d78da6727a277e1f54d65c8 to your computer and use it in GitHub Desktop.
1BRC
[package]
name = "rust_1brc"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]}
use polars::prelude::*;
use std::time::Instant;
use std::vec;
fn run_polars() -> Result<DataFrame, PolarsError> {
let now = Instant::now();
let f1: Field = Field::new("station", DataType::String);
let f2: Field = Field::new("measure", DataType::Float64);
let sc: Schema = Schema::from_iter(vec![f1, f2]);
let q = LazyCsvReader::new("../measurements.txt")
.has_header(false)
.with_schema(Some(Arc::new(sc)))
.with_separator(b';')
.finish()?
.group_by(vec![col("station")])
.agg(vec![
col("measure").alias("min").min(),
col("measure").alias("mean").mean(),
col("measure").alias("max").max(),
])
.sort("station", Default::default())
.with_streaming(true);
let df = q.collect()?;
println!("Time={} μs", now.elapsed().as_micros());
Ok(df)
}
fn main() {
run_polars();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment