Skip to content

Instantly share code, notes, and snippets.

@r-brink
Last active February 5, 2024 08:15
Show Gist options
  • Save r-brink/3bbac4ecfe312e143d723dc1c4180025 to your computer and use it in GitHub Desktop.
Save r-brink/3bbac4ecfe312e143d723dc1c4180025 to your computer and use it in GitHub Desktop.
import hvplot
import polars as pl
def plot_combined_csv(
filter_dataset_file: str, dataset_size: str, title: str, normalized: bool = False
):
df = pl.read_csv(f"output/filter/filter_{dataset_size}_*.csv").with_columns(
(pl.col("Mask") * 100).cast(int)
)
plot = df.plot.bar(
x="Mask",
y="Time",
by=["Polars_Version"],
color=["#73bfb8", "#0075FF"],
xlabel="Selectivity [%]",
ylabel="Time [s]",
title=f"{title} by Polars version",
)
plot = plot.redim.values(Polars_Version=["0.20.6", "0.20.5"])
hvplot.save(plot, filename=f"{filter_dataset_file}.html")
plot_combined_csv(
"filter_small16", "small16", "Filter on 16x1M rows with <= 12 byte strings"
)
plot_combined_csv(
"filter_medium16", "medium16", "Filter on 16x1M rows between 1 and 200 byte strings"
)
plot_combined_csv(
"filter_large16", "large16", "Filter on 16x1M rows with 500 byte strings"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment