Skip to content

Instantly share code, notes, and snippets.

@r-brink
Last active February 5, 2024 08:15
Show Gist options
  • Save r-brink/c46e94c423c148b2bb8f4ebb9c013a14 to your computer and use it in GitHub Desktop.
Save r-brink/c46e94c423c148b2bb8f4ebb9c013a14 to your computer and use it in GitHub Desktop.
from datetime import date
from timeit import timeit
import pandas as pd
import polars as pl
df = pl.DataFrame(
{
"v": range(1_000_000),
"w": 999.999,
"x": -42.0,
"y": date.today(),
"z": "acbdefghijlmnop",
}
)
wb = df.write_excel(xl := "test.xlsx")
def pandas_read_excel():
pd.read_excel(xl)
def polars_xlsx2csv():
pl.read_excel(xl, engine="xlsx2csv")
def polars_openpyxl():
pl.read_excel(xl, engine="openpyxl")
def polars_calamine():
pl.read_excel(xl, engine="calamine")
def read_excel_performance(csv_filename):
pandas_version = pd.__version__
results = [
{
"Engine": f"Pandas {pandas_version}",
"Time": timeit(pandas_read_excel, number=1),
},
{"Engine": "xlsx2csv", "Time": timeit(polars_xlsx2csv, number=1)},
{"Engine": "openpyxl", "Time": timeit(polars_openpyxl, number=1)},
{"Engine": "calamine (new)", "Time": timeit(polars_calamine, number=1)},
]
# Write results to CSV
pl.DataFrame(results).write_csv(f"output/{csv_filename}-asdfkjsahfkjash.csv")
read_excel_performance("read_excel_results")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment