Skip to content

Instantly share code, notes, and snippets.

@ndrezn
Last active October 21, 2024 21:57
Show Gist options
  • Save ndrezn/3162c1f01b72b5d168cff30eabfdb692 to your computer and use it in GitHub Desktop.
Save ndrezn/3162c1f01b72b5d168cff30eabfdb692 to your computer and use it in GitHub Desktop.
Narwhals Plotly.py Performance Test
import polars as pl
import pandas as pd
import numpy as np
import plotly.express as px
import time
from functools import wraps
# Decorator to time a function multiple times and return the average time
def timeit(repeat=1):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
total_time = 0
for _ in range(repeat):
start_time = time.time() # Record start time
result = func(*args, **kwargs)
end_time = time.time() # Record end time
total_time += end_time - start_time # Accumulate the total time
average_time = total_time / repeat # Calculate average time
return average_time # Return average time for comparison
return wrapper
return decorator
# Generate a large Polars DataFrame with additional columns for color and facet
def generate_large_polars_df(num_rows=1000000):
np.random.seed(42)
data = {
"x": np.random.uniform(0, 100, num_rows),
"y": np.random.uniform(0, 100, num_rows),
"category": np.random.choice(["A", "B", "C"], num_rows), # Original category
"colorby": np.random.choice(["Group 1", "Group 2"], num_rows), # Color by group
"facetby": np.random.choice(
["Region 1", "Region 2"], num_rows
), # Facet by region
}
return pl.DataFrame(data)
# Generate the same dataset as a Pandas DataFrame
def generate_large_pandas_df(num_rows=1000000):
np.random.seed(42)
data = {
"x": np.random.uniform(0, 100, num_rows),
"y": np.random.uniform(0, 100, num_rows),
"category": np.random.choice(["A", "B", "C"], num_rows), # Original category
"colorby": np.random.choice(["Group 1", "Group 2"], num_rows), # Color by group
"facetby": np.random.choice(
["Region 1", "Region 2"], num_rows
), # Facet by region
}
return pd.DataFrame(data)
pandas_df = generate_large_pandas_df()
polars_df = generate_large_polars_df()
# Scatter plot using Polars DataFrame
@timeit(repeat=5)
def figure_generation_scatter_polars():
fig = px.scatter(
polars_df,
x="x",
y="y",
color="colorby", # Color by the 'colorby' column
facet_col="facetby", # Facet by the 'facetby' column
title="Scatter Plot with Color and Facet (Polars)",
)
# Scatter plot using Pandas DataFrame
@timeit(repeat=5)
def figure_generation_scatter_pandas():
fig = px.scatter(
pandas_df,
x="x",
y="y",
color="colorby", # Color by the 'colorby' column
facet_col="facetby", # Facet by the 'facetby' column
title="Scatter Plot with Color and Facet (Pandas)",
)
# Bar plot using Polars DataFrame
@timeit(repeat=5)
def figure_generation_bar_polars():
fig = px.bar(
polars_df,
x="category",
y="y",
color="colorby", # Color by the 'colorby' column
facet_col="facetby", # Facet by the 'facetby' column
title="Bar Plot with Color and Facet (Polars)",
)
# Bar plot using Pandas DataFrame
@timeit(repeat=5)
def figure_generation_bar_pandas():
fig = px.bar(
pandas_df,
x="category",
y="y",
color="colorby", # Color by the 'colorby' column
facet_col="facetby", # Facet by the 'facetby' column
title="Bar Plot with Color and Facet (Pandas)",
)
# Function to test all charts and gather the times for both Polars and Pandas
def test_all_charts():
results = {}
# Polars performance
results["scatter_polars"] = figure_generation_scatter_polars()
results["bar_polars"] = figure_generation_bar_polars()
# Pandas performance
results["scatter_pandas"] = figure_generation_scatter_pandas()
results["bar_pandas"] = figure_generation_bar_pandas()
return results
# Run the performance tests and save results to CSV
def run_and_save_results(csv_filename):
env_results = test_all_charts()
# Save results to CSV using Polars
df = pl.DataFrame(
{"Chart Type": list(env_results.keys()), "Time (s)": list(env_results.values())}
)
df.write_csv(csv_filename)
print(f"Results saved to {csv_filename}")
# Entry point for running the tests
if __name__ == "__main__":
import sys
if len(sys.argv) != 2:
print("Usage: python performance_test.py <csv_filename>")
else:
csv_filename = sys.argv[1]
run_and_save_results(csv_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment