Last active
April 9, 2024 05:22
-
-
Save Butch78/702944427d78da6727a277e1f54d65c8 to your computer and use it in GitHub Desktop.
1BRC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "rust_1brc" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
polars = { version = "0.36.2", features = ["csv", "lazy", "nightly", "streaming"]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use polars::prelude::*; | |
use std::time::Instant; | |
use std::vec; | |
fn run_polars() -> Result<DataFrame, PolarsError> { | |
let now = Instant::now(); | |
let f1: Field = Field::new("station", DataType::String); | |
let f2: Field = Field::new("measure", DataType::Float64); | |
let sc: Schema = Schema::from_iter(vec![f1, f2]); | |
let q = LazyCsvReader::new("../measurements.txt") | |
.has_header(false) | |
.with_schema(Some(Arc::new(sc))) | |
.with_separator(b';') | |
.finish()? | |
.group_by(vec![col("station")]) | |
.agg(vec![ | |
col("measure").alias("min").min(), | |
col("measure").alias("mean").mean(), | |
col("measure").alias("max").max(), | |
]) | |
.sort("station", Default::default()) | |
.with_streaming(true); | |
let df = q.collect()?; | |
println!("Time={} μs", now.elapsed().as_micros()); | |
Ok(df) | |
} | |
fn main() { | |
run_polars(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment