-
-
Save orlp/053617afe9f01b2c0de7e1b4cc57bdd9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import polars as pl | |
import numpy as np | |
from timeit import timeit | |
rng = np.random.default_rng(42) | |
n = 10**7 | |
ages = rng.integers(0, 100, n, dtype=np.uint8, endpoint=True) | |
heights = rng.uniform(1, 2, n).astype(np.float32) | |
rode_rollercoaster = rng.uniform(0, 1, n) < 0.5 | |
df = pl.DataFrame({"age": ages, "height": heights, "rode_rollercoaster": rode_rollercoaster}) | |
def bench(name, func): | |
print(f"benchmarking {name}") | |
times = [timeit(lambda: func(age_cutoff), number=10) for age_cutoff in range(0, 101)] | |
out = pl.DataFrame({"age_cutoff": range(0, 101), "time": times}) | |
out.write_csv(name + ".csv") | |
bench("float-filter-polars-" + pl.__version__, lambda age_cutoff: df.select(pl.col.height.filter(pl.col.age < age_cutoff).mean())) | |
bench("bool-filter-polars-" + pl.__version__, lambda age_cutoff: df.select(pl.col.rode_rollercoaster.filter(pl.col.age < age_cutoff).mean())) | |
bench("float-filter-numpy", lambda age_cutoff: heights[ages < age_cutoff]) | |
bench("bool-filter-numpy", lambda age_cutoff: rode_rollercoaster[ages < age_cutoff]) | |
bench("float-whenthen-polars-" + pl.__version__, lambda age_cutoff: df.select(pl.when(pl.col.age < age_cutoff).then(1.0).otherwise(pl.col.height).mean())) | |
bench("bool-whenthen-polars-" + pl.__version__, lambda age_cutoff: df.select(pl.when(pl.col.age < age_cutoff).then(False).otherwise(pl.col.rode_rollercoaster).mean())) | |
bench("float-whenthen-numpy", lambda age_cutoff: np.where(ages < age_cutoff, 1.0, heights)) | |
bench("bool-whenthen-numpy", lambda age_cutoff: np.where(ages < age_cutoff, False, rode_rollercoaster)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import polars as pl | |
import hvplot | |
from pathlib import Path | |
old_ver = "0.20.10" | |
new_ver = "0.20.16" | |
def join_data(prefix, on): | |
float_filter_pl_new = pl.read_csv(f"{prefix}-polars-{new_ver}.csv") | |
float_filter_pl_old = pl.read_csv(f"{prefix}-polars-{old_ver}.csv") | |
float_filter_np = pl.read_csv(f"{prefix}-numpy.csv") | |
return ( | |
float_filter_np | |
.join(float_filter_pl_old, on=on, suffix="_old") | |
.join(float_filter_pl_new, on=on, suffix="_new") | |
.with_columns( | |
pl.col.time.alias("numpy"), | |
pl.col.time_old.alias(f"polars-{old_ver}"), | |
pl.col.time_new.alias(f"polars-{new_ver}"), | |
) | |
) | |
Path("out").mkdir(parents=True, exist_ok=True) | |
data = join_data("float-filter", "age_cutoff") | |
plt = data.plot.line(x = "age_cutoff", y = ["numpy", f"polars-{old_ver}", f"polars-{new_ver}"], xlabel = "age cutoff (selectivity %)", ylabel="runtime (s)", ylim=(0, None)) | |
hvplot.save(plt, 'out/float-filter-plot.html') | |
data = join_data("bool-filter", "age_cutoff") | |
plt = data.plot.line(x = "age_cutoff", y = ["numpy", f"polars-{old_ver}", f"polars-{new_ver}"], xlabel = "age cutoff (selectivity %)", ylabel="runtime (s)", ylim=(0, None)) | |
hvplot.save(plt, 'out/bool-filter-plot.html') | |
data = join_data("float-whenthen", "age_cutoff") | |
plt = data.plot.line(x = "age_cutoff", y = ["numpy", f"polars-{old_ver}", f"polars-{new_ver}"], xlabel = "age cutoff (true %)", ylabel="runtime (s)", ylim=(0, None)) | |
hvplot.save(plt, 'out/float-whenthen-plot.html') | |
data = join_data("bool-whenthen", "age_cutoff") | |
plt = data.plot.line(x = "age_cutoff", y = ["numpy", f"polars-{old_ver}", f"polars-{new_ver}"], xlabel = "age cutoff (true %)", ylabel="runtime (s)", ylim=(0, None)) | |
hvplot.save(plt, 'out/bool-whenthen-plot.html') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment