Skip to content

Instantly share code, notes, and snippets.

@jonashaag
Created August 1, 2023 14:11
Show Gist options
  • Save jonashaag/4dba5ebf38c39c8a011026ecde2b903a to your computer and use it in GitHub Desktop.
Save jonashaag/4dba5ebf38c39c8a011026ecde2b903a to your computer and use it in GitHub Desktop.
import os
import random
import polars as pl
import timeit
import tqdm
results = []
for shuffle_rhs in [False]:
for lhs_size in tqdm.tqdm([11, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000]):
for rhs_size in [1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000]:
lhs = pl.Series(range(lhs_size))
rhs = pl.Series(range(rhs_size))
if shuffle_rhs:
rhs = rhs.shuffle()
nrepeat = 5
bench = lambda number: min(timeit.repeat('lhs.is_in(rhs)', number=number, globals=globals(), repeat=nrepeat))/number
time1 = bench(1)
number= min(100, max(3, int(1e-2/time1)))
time = bench(number)
results.append({
"shuffled": shuffle_rhs,
"lhs_size": lhs_size,
"rhs_size": rhs_size,
f"{os.getenv('bench_name')}_time": time,
})
pl.DataFrame(results).write_csv(f"/tmp/{os.getenv('bench_name')}.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment