-
-
Save ndrezn/3162c1f01b72b5d168cff30eabfdb692 to your computer and use it in GitHub Desktop.
Narwhals Plotly.py Performance Test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import polars as pl | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import time | |
from functools import wraps | |
# Decorator to time a function multiple times and return the average time | |
def timeit(repeat=1): | |
def decorator(func): | |
@wraps(func) | |
def wrapper(*args, **kwargs): | |
total_time = 0 | |
for _ in range(repeat): | |
start_time = time.time() # Record start time | |
result = func(*args, **kwargs) | |
end_time = time.time() # Record end time | |
total_time += end_time - start_time # Accumulate the total time | |
average_time = total_time / repeat # Calculate average time | |
return average_time # Return average time for comparison | |
return wrapper | |
return decorator | |
# Generate a large Polars DataFrame with additional columns for color and facet | |
def generate_large_polars_df(num_rows=1000000): | |
np.random.seed(42) | |
data = { | |
"x": np.random.uniform(0, 100, num_rows), | |
"y": np.random.uniform(0, 100, num_rows), | |
"category": np.random.choice(["A", "B", "C"], num_rows), # Original category | |
"colorby": np.random.choice(["Group 1", "Group 2"], num_rows), # Color by group | |
"facetby": np.random.choice( | |
["Region 1", "Region 2"], num_rows | |
), # Facet by region | |
} | |
return pl.DataFrame(data) | |
# Generate the same dataset as a Pandas DataFrame | |
def generate_large_pandas_df(num_rows=1000000): | |
np.random.seed(42) | |
data = { | |
"x": np.random.uniform(0, 100, num_rows), | |
"y": np.random.uniform(0, 100, num_rows), | |
"category": np.random.choice(["A", "B", "C"], num_rows), # Original category | |
"colorby": np.random.choice(["Group 1", "Group 2"], num_rows), # Color by group | |
"facetby": np.random.choice( | |
["Region 1", "Region 2"], num_rows | |
), # Facet by region | |
} | |
return pd.DataFrame(data) | |
pandas_df = generate_large_pandas_df() | |
polars_df = generate_large_polars_df() | |
# Scatter plot using Polars DataFrame | |
@timeit(repeat=5) | |
def figure_generation_scatter_polars(): | |
fig = px.scatter( | |
polars_df, | |
x="x", | |
y="y", | |
color="colorby", # Color by the 'colorby' column | |
facet_col="facetby", # Facet by the 'facetby' column | |
title="Scatter Plot with Color and Facet (Polars)", | |
) | |
# Scatter plot using Pandas DataFrame | |
@timeit(repeat=5) | |
def figure_generation_scatter_pandas(): | |
fig = px.scatter( | |
pandas_df, | |
x="x", | |
y="y", | |
color="colorby", # Color by the 'colorby' column | |
facet_col="facetby", # Facet by the 'facetby' column | |
title="Scatter Plot with Color and Facet (Pandas)", | |
) | |
# Bar plot using Polars DataFrame | |
@timeit(repeat=5) | |
def figure_generation_bar_polars(): | |
fig = px.bar( | |
polars_df, | |
x="category", | |
y="y", | |
color="colorby", # Color by the 'colorby' column | |
facet_col="facetby", # Facet by the 'facetby' column | |
title="Bar Plot with Color and Facet (Polars)", | |
) | |
# Bar plot using Pandas DataFrame | |
@timeit(repeat=5) | |
def figure_generation_bar_pandas(): | |
fig = px.bar( | |
pandas_df, | |
x="category", | |
y="y", | |
color="colorby", # Color by the 'colorby' column | |
facet_col="facetby", # Facet by the 'facetby' column | |
title="Bar Plot with Color and Facet (Pandas)", | |
) | |
# Function to test all charts and gather the times for both Polars and Pandas | |
def test_all_charts(): | |
results = {} | |
# Polars performance | |
results["scatter_polars"] = figure_generation_scatter_polars() | |
results["bar_polars"] = figure_generation_bar_polars() | |
# Pandas performance | |
results["scatter_pandas"] = figure_generation_scatter_pandas() | |
results["bar_pandas"] = figure_generation_bar_pandas() | |
return results | |
# Run the performance tests and save results to CSV | |
def run_and_save_results(csv_filename): | |
env_results = test_all_charts() | |
# Save results to CSV using Polars | |
df = pl.DataFrame( | |
{"Chart Type": list(env_results.keys()), "Time (s)": list(env_results.values())} | |
) | |
df.write_csv(csv_filename) | |
print(f"Results saved to {csv_filename}") | |
# Entry point for running the tests | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) != 2: | |
print("Usage: python performance_test.py <csv_filename>") | |
else: | |
csv_filename = sys.argv[1] | |
run_and_save_results(csv_filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment