Skip to content

Instantly share code, notes, and snippets.

@deanm0000
Last active March 20, 2024 15:16
Show Gist options
  • Save deanm0000/757c2b823746ea76e5ae349126273c40 to your computer and use it in GitHub Desktop.
Save deanm0000/757c2b823746ea76e5ae349126273c40 to your computer and use it in GitHub Desktop.
benchmark filtering list in polars series
import polars as pl
import numpy as np
from itertools import product
import time
from datetime import datetime
import json
def gen_long_string(str_len=10, n_rows=10_000_000):
rng = np.random.default_rng()
return rng.integers(low=96, high=122, size=n_rows * str_len, dtype="uint32").view(
f"U{str_len}"
)
def run_benches(n, j):
print(f"{datetime.now()}: n={n}, j={j}")
remove_str = gen_long_string(6, 1)[0]
df = (
pl.concat(
[
pl.DataFrame(
{
"i": np.random.randint(0, n, n * j),
"a": gen_long_string(5, n * j),
}
),
pl.select(i=pl.arange(n), a=pl.lit(remove_str)),
]
)
.sample(n * (j + 1), shuffle=True)
.filter(pl.len().over("i") > 2)
)
s = df.group_by("i").agg("a").get_column("a")
result = {}
result["list_eval"] = {}
strt = time.time()
result["list_eval"]["answer"] = s.list.eval(
pl.element().filter(pl.element() != remove_str)
)
result["list_eval"]["timing"] = time.time() - strt
if n <= 1_000_000:
result["map_elem"] = {}
strt = time.time()
result["map_elem"]["answer"] = s.map_elements(
lambda x: [y for y in x if y != remove_str]
)
result["map_elem"]["timing"] = time.time() - strt
result["gather"] = {}
strt = time.time()
result["gather"]["answer"] = (
pl.select(a=s)
.with_row_index("i")
.group_by("i")
.agg(
pl.col("a")
.list.gather(pl.arg_where(pl.col("a").explode() != remove_str))
.first()
)
.get_column("a")
)
result["gather"]["timing"] = time.time() - strt
result["filter"] = {}
strt = time.time()
result["filter"]["answer"] = (
pl.select(a=s)
.with_row_index("i")
.explode("a")
.filter(pl.col("a") != remove_str)
.group_by("i")
.agg("a")
.get_column("a")
)
result["filter"]["timing"] = time.time() - strt
result["set_diff"] = {}
strt = time.time()
result["set_diff"]["answer"] = s.list.set_difference([remove_str])
result["set_diff"]["timing"] = time.time() - strt
res_keys = list(result.keys())
for i, key in enumerate(res_keys):
for jkey in res_keys[i + 1 :]:
if not result[key]["answer"].equals(result[jkey]["answer"]):
result["inconsistent"] = (key, jkey)
for key in res_keys:
del result[key]["answer"]
result["n"] = n
result["j"] = j
with open(f"./benchmarks/n{n}_j{j}.json", "w") as ff:
ff.write(json.dumps(result))
def main():
for nexp, j in product(range(1, 6), range(5, 55, 5)):
n = int(10**nexp)
run_benches(n, j)
for n, j in product(range(200_000, 1_100_000, 100_000), range(5, 55, 5)):
run_benches(n, j)
for n, j in product(range(1_500_000, 5_500_000, 500_000), range(5, 55, 5)):
run_benches(n, j)
## I just run this line by line in VSC
def interactive():
import os
import plotly.express as px
results = pl.concat(
[pl.read_json(f"./benchmarks/{x}") for x in os.listdir("./benchmarks")],
how='diagonal_relaxed'
)
results = results.select(
pl.col(x).struct.field("timing").alias(x)
if isinstance(y, pl.Struct)
else pl.col(x)
for x, y in results.schema.items()
).drop("inconsistent").sort(['n','j'])
px.line(
results.filter(j=50),
x='n',
y=['list_eval','filter','set_diff'],
)
px.line(
results.filter(j=50),
x='n',
y=['map_elem','gather','list_eval','filter','set_diff'],
log_y=True
)
if __name__ == "__main__":
main()
list_eval map_elem gather filter set_diff n j
0.0003058910369873047 0.0035696029663085938 0.0027608871459960938 0.004587650299072266 0.0008900165557861328 10 5
0.0006973743438720703 0.00043487548828125 0.0009138584136962891 0.0009729862213134766 0.000202178955078125 10 10
0.0012056827545166016 0.00044417381286621094 0.001874685287475586 0.0015718936920166016 0.0004546642303466797 10 15
0.0010089874267578125 0.00037980079650878906 0.001474142074584961 0.0015406608581542969 0.00032329559326171875 10 20
0.0012929439544677734 0.00035262107849121094 0.0018665790557861328 0.0016911029815673828 0.00027298927307128906 10 25
0.0011153221130371094 0.00034999847412109375 0.0017681121826171875 0.0014374256134033203 0.0002987384796142578 10 30
0.0012254714965820312 0.0003757476806640625 0.0019495487213134766 0.0013806819915771484 0.00031495094299316406 10 35
0.001129150390625 0.0003845691680908203 0.0019249916076660156 0.0015635490417480469 0.00029730796813964844 10 40
0.0011637210845947266 0.0003521442413330078 0.0020377635955810547 0.001482248306274414 0.0003292560577392578 10 45
0.0019025802612304688 0.0005764961242675781 0.0024755001068115234 0.002012491226196289 0.0003399848937988281 10 50
0.0015108585357666016 0.0019495487213134766 0.005085468292236328 0.002294301986694336 0.0006985664367675781 100 5
0.0033097267150878906 0.0013899803161621094 0.005503177642822266 0.002210855484008789 0.0003147125244140625 100 10
0.002521991729736328 0.0015912055969238281 0.005087137222290039 0.0020198822021484375 0.0003578662872314453 100 15
0.0021173954010009766 0.0016531944274902344 0.0041124820709228516 0.0022652149200439453 0.0004200935363769531 100 20
0.0012094974517822266 0.0017170906066894531 0.00534820556640625 0.0020897388458251953 0.0003974437713623047 100 25
0.0017800331115722656 0.0017368793487548828 0.004820823669433594 0.001874685287475586 0.00041604042053222656 100 30
0.002555370330810547 0.0018496513366699219 0.004726886749267578 0.0026476383209228516 0.0004253387451171875 100 35
0.0025146007537841797 0.001955747604370117 0.0054285526275634766 0.0021791458129882812 0.0003826618194580078 100 40
0.0020482540130615234 0.0019550323486328125 0.004194736480712891 0.0020682811737060547 0.00038695335388183594 100 45
0.0023729801177978516 0.0020635128021240234 0.004250764846801758 0.002125978469848633 0.00032973289489746094 100 50
0.002405881881713867 0.011301279067993164 0.007578849792480469 0.0025510787963867188 0.0004899501800537109 1000 5
0.0023865699768066406 0.012541055679321289 0.007842779159545898 0.0023674964904785156 0.0006079673767089844 1000 10
0.0017426013946533203 0.01348257064819336 0.008896589279174805 0.0026068687438964844 0.0008215904235839844 1000 15
0.002714395523071289 0.014424800872802734 0.009620189666748047 0.0025382041931152344 0.0008647441864013672 1000 20
0.0020089149475097656 0.014636039733886719 0.009228229522705078 0.002992391586303711 0.0009636878967285156 1000 25
0.0029261112213134766 0.01540684700012207 0.009951353073120117 0.0024383068084716797 0.0009891986846923828 1000 30
0.0034096240997314453 0.01735663414001465 0.010614156723022461 0.002742290496826172 0.001127481460571289 1000 35
0.003414154052734375 0.017011642456054688 0.011775732040405273 0.002819061279296875 0.0013082027435302734 1000 40
0.002290487289428711 0.01690959930419922 0.011354684829711914 0.004061460494995117 0.001276254653930664 1000 45
0.0029859542846679688 0.01729106903076172 0.011580705642700195 0.003548860549926758 0.0013377666473388672 1000 50
0.0050847530364990234 0.11308908462524414 0.052410125732421875 0.0050945281982421875 0.0017619132995605469 10000 5
0.006773710250854492 0.11309814453125 0.05490231513977051 0.006292819976806641 0.0028934478759765625 10000 10
0.008968353271484375 0.1173698902130127 0.058214426040649414 0.00735926628112793 0.003995656967163086 10000 15
0.008832931518554688 0.12348818778991699 0.0579073429107666 0.008041858673095703 0.005208015441894531 10000 20
0.008797168731689453 0.1309680938720703 0.06004667282104492 0.008798837661743164 0.006229400634765625 10000 25
0.011563777923583984 0.14484357833862305 0.06539249420166016 0.009204387664794922 0.006708621978759766 10000 30
0.01167154312133789 0.14609432220458984 0.06847906112670898 0.010996818542480469 0.007661342620849609 10000 35
0.013521909713745117 0.1516726016998291 0.06888270378112793 0.012316465377807617 0.008860111236572266 10000 40
0.014079093933105469 0.15782713890075684 0.0719757080078125 0.011461734771728516 0.01039743423461914 10000 45
0.014083623886108398 0.1670217514038086 0.07868552207946777 0.011887788772583008 0.011754035949707031 10000 50
0.03662848472595215 1.0443735122680664 0.47254157066345215 0.02165961265563965 0.015370607376098633 100000 5
0.04747653007507324 1.17653489112854 0.5179128646850586 0.02978801727294922 0.025055408477783203 100000 10
0.05812382698059082 1.232947587966919 0.556776762008667 0.037604331970214844 0.03780841827392578 100000 15
0.06632494926452637 1.2898163795471191 0.5837357044219971 0.044064998626708984 0.048483848571777344 100000 20
0.07448434829711914 1.3707177639007568 0.6629555225372314 0.05587196350097656 0.060358524322509766 100000 25
0.0898432731628418 1.397855281829834 0.647188663482666 0.058463335037231445 0.06584405899047852 100000 30
0.09840130805969238 1.516371726989746 0.6988534927368164 0.06763052940368652 0.07556915283203125 100000 35
0.10400938987731934 1.5703885555267334 0.6829056739807129 0.07387351989746094 0.08420634269714355 100000 40
0.11641502380371094 1.6494402885437012 0.7425918579101562 0.0799875259399414 0.0964670181274414 100000 45
0.1215062141418457 1.7109780311584473 0.7801945209503174 0.09115839004516602 0.10499238967895508 100000 50
0.0187532901763916 0.5513198375701904 0.2551004886627197 0.011800050735473633 0.005629062652587891 200000 1
0.0426793098449707 1.2439353466033936 0.5836501121520996 0.021267175674438477 0.014157772064208984 200000 2
0.05912280082702637 1.7167065143585205 0.829826831817627 0.032924652099609375 0.02049732208251953 200000 3
0.07011103630065918 1.970088243484497 0.9299392700195312 0.035378456115722656 0.026348590850830078 200000 4
0.07534146308898926 2.1351044178009033 1.0035107135772705 0.039574623107910156 0.030529499053955078 200000 5
0.09314608573913574 2.4091577529907227 1.1098566055297852 0.057605743408203125 0.05662870407104492 200000 10
0.11988615989685059 2.544966220855713 1.1399343013763428 0.09227752685546875 0.07932686805725098 200000 15
0.12927913665771484 2.714515209197998 1.2210619449615479 0.08713531494140625 0.0932457447052002 200000 20
0.14887285232543945 2.91017746925354 1.338043212890625 0.10830998420715332 0.12752223014831543 200000 25
0.19503426551818848 3.0453364849090576 1.3476014137268066 0.14337587356567383 0.13786101341247559 200000 30
0.19773197174072266 3.2983529567718506 1.5082032680511475 0.13189148902893066 0.15948104858398438 200000 35
0.2193605899810791 3.503009796142578 1.5871539115905762 0.1596672534942627 0.18218541145324707 200000 40
0.26112985610961914 3.4937169551849365 1.4671661853790283 0.16500592231750488 0.19510602951049805 200000 45
0.2458493709564209 3.599932909011841 1.5380635261535645 0.171156644821167 0.21190094947814941 200000 50
0.10674238204956055 3.4103169441223145 1.6358745098114014 0.06503129005432129 0.051931142807006836 300000 5
0.18442201614379883 3.8844797611236572 1.6789817810058594 0.09222793579101562 0.07832527160644531 300000 10
0.17657828330993652 4.091902494430542 1.884824514389038 0.11269259452819824 0.12147903442382812 300000 15
0.20893239974975586 4.071482181549072 1.9356465339660645 0.128432035446167 0.15677523612976074 300000 20
0.23235583305358887 4.331322431564331 1.9512650966644287 0.1502993106842041 0.17764902114868164 300000 25
0.2726166248321533 4.803229093551636 2.0982937812805176 0.1746816635131836 0.20998215675354004 300000 30
0.29155421257019043 5.218661069869995 2.112912178039551 0.20274662971496582 0.2387247085571289 300000 35
0.46607279777526855 5.142688274383545 2.3496932983398438 0.2106635570526123 0.27268218994140625 300000 40
0.4171171188354492 5.242428779602051 2.301084280014038 0.22965025901794434 0.2875845432281494 300000 45
0.3820459842681885 5.504771709442139 2.4000322818756104 0.2567288875579834 0.319779634475708 300000 50
0.14282965660095215 4.782256603240967 2.2065868377685547 0.0795595645904541 0.0620121955871582 400000 5
0.1880643367767334 4.96095871925354 2.2914607524871826 0.11069583892822266 0.11432766914367676 400000 10
0.2511777877807617 5.318471193313599 2.3792591094970703 0.14272522926330566 0.14477038383483887 400000 15
0.2766852378845215 5.87774920463562 2.6678223609924316 0.18324613571166992 0.19058918952941895 400000 20
0.2941274642944336 5.984944581985474 2.6480135917663574 0.197465181350708 0.22492623329162598 400000 25
0.41121482849121094 6.249768257141113 2.736908435821533 0.23229479789733887 0.2715933322906494 400000 30
0.5178122520446777 7.428604364395142 3.2490062713623047 0.296506404876709 0.35167813301086426 400000 35
0.4941227436065674 6.766913890838623 2.9750306606292725 0.277482271194458 0.35622167587280273 400000 40
0.46323585510253906 7.079568386077881 3.0339908599853516 0.336841344833374 0.3936953544616699 400000 45
0.5812158584594727 8.302779197692871 3.578207015991211 0.362276554107666 0.4676358699798584 400000 50
0.18697547912597656 5.684703826904297 2.6151859760284424 0.10345458984375 0.08282995223999023 500000 5
0.24095702171325684 6.099334001541138 2.9326045513153076 0.1389484405517578 0.13433575630187988 500000 10
0.28505969047546387 6.841082572937012 3.4073843955993652 0.19436240196228027 0.21895051002502441 500000 15
0.32920265197753906 7.210626602172852 3.1596157550811768 0.20749926567077637 0.23917698860168457 500000 20
0.41687583923339844 7.463999509811401 3.3857595920562744 0.23855805397033691 0.29387855529785156 500000 25
0.4539003372192383 7.825607061386108 3.4552700519561768 0.2812657356262207 0.33819031715393066 500000 30
0.5227677822113037 8.463021993637085 3.7018556594848633 0.33322572708129883 0.39049816131591797 500000 35
0.6101446151733398 8.630357265472412 3.8032689094543457 0.4119448661804199 0.4439706802368164 500000 40
0.6346924304962158 9.267220258712769 4.072887420654297 0.3944535255432129 0.5096585750579834 500000 45
0.6755454540252686 9.64454984664917 4.087319612503052 0.4799625873565674 0.5553164482116699 500000 50
0.21784520149230957 6.635578393936157 3.136085271835327 0.11472415924072266 0.09233713150024414 600000 5
0.2895474433898926 7.510256290435791 3.515533208847046 0.16349577903747559 0.16831398010253906 600000 10
0.3588259220123291 7.97222900390625 3.6330995559692383 0.22342491149902344 0.23523569107055664 600000 15
0.4060509204864502 8.462795495986938 3.707643985748291 0.2457256317138672 0.27774500846862793 600000 20
0.5135014057159424 8.881663084030151 4.134042024612427 0.29041528701782227 0.34679198265075684 600000 25
0.5526971817016602 9.444444417953491 4.20079493522644 0.3491075038909912 0.3952009677886963 600000 30
0.6348087787628174 10.137878179550171 4.53521203994751 0.3989393711090088 0.48314619064331055 600000 35
0.6986052989959717 10.621011734008789 4.596702337265015 0.4532766342163086 0.5492391586303711 600000 40
0.7607805728912354 11.082736253738403 5.0153350830078125 0.46038269996643066 0.5889432430267334 600000 45
0.8865764141082764 11.426913738250732 5.377993822097778 0.514094352722168 0.6676816940307617 600000 50
0.2630441188812256 8.615543842315674 3.693018913269043 0.13323760032653809 0.10773491859436035 700000 5
0.35546064376831055 8.712575197219849 3.9759926795959473 0.20519495010375977 0.18000388145446777 700000 10
0.398911714553833 9.11710238456726 4.093834161758423 0.24604225158691406 0.2754952907562256 700000 15
0.4549894332885742 9.754407405853271 4.49139404296875 0.29343461990356445 0.32321739196777344 700000 20
0.5242867469787598 10.599454402923584 4.679348707199097 0.34205007553100586 0.3931617736816406 700000 25
0.6446354389190674 11.186030149459839 5.158297300338745 0.38741183280944824 0.4721043109893799 700000 30
0.7348883152008057 11.514493227005005 5.214662551879883 0.4250938892364502 0.542668342590332 700000 35
0.8456590175628662 12.515820741653442 5.48298716545105 0.48569369316101074 0.6452333927154541 700000 40
0.9062638282775879 13.230853080749512 5.569413900375366 0.5591232776641846 0.7474963665008545 700000 45
1.0951249599456787 13.290188550949097 5.9261932373046875 0.6033656597137451 0.7802834510803223 700000 50
0.3006765842437744 9.625425577163696 4.187897443771362 0.1571958065032959 0.12427663803100586 800000 5
0.38442373275756836 10.197972536087036 4.5809385776519775 0.22846722602844238 0.2203669548034668 800000 10
0.4796264171600342 10.963968753814697 4.9616920948028564 0.28539371490478516 0.32684874534606934 800000 15
0.5408437252044678 11.44904088973999 5.063884258270264 0.3380582332611084 0.38109397888183594 800000 20
0.6464614868164062 12.419625759124756 5.388000726699829 0.3958768844604492 0.45777010917663574 800000 25
0.805851936340332 13.71467399597168 5.642616033554077 0.45508527755737305 0.5815324783325195 800000 30
0.8722639083862305 14.15036678314209 5.976379156112671 0.5349736213684082 0.6244969367980957 800000 35
0.9926395416259766 14.458184957504272 6.107063055038452 0.5548074245452881 0.7674961090087891 800000 40
1.059497356414795 15.095672607421875 6.50857400894165 0.6302790641784668 0.7974319458007812 800000 45
1.1353869438171387 15.691912174224854 6.498384237289429 0.6721963882446289 0.899198055267334 800000 50
0.3442535400390625 11.50998330116272 5.786457300186157 0.19279074668884277 0.14927434921264648 900000 5
0.4914999008178711 11.818665504455566 5.251668930053711 0.24283075332641602 0.25625181198120117 900000 10
0.5426340103149414 12.790624618530273 5.4049599170684814 0.32893896102905273 0.347301721572876 900000 15
0.5973477363586426 12.953245162963867 5.762306213378906 0.3647918701171875 0.427213191986084 900000 20
0.7205832004547119 13.902679920196533 6.304142951965332 0.4921848773956299 0.5686235427856445 900000 25
0.8498704433441162 14.648099899291992 6.820252418518066 0.6206092834472656 0.633887529373169 900000 30
0.9643218517303467 15.491421699523926 6.650310516357422 0.5905420780181885 0.7440710067749023 900000 35
1.018357276916504 15.450956344604492 6.739043712615967 0.6473352909088135 0.8664493560791016 900000 40
1.248443841934204 16.959635496139526 6.917187929153442 0.7201731204986572 0.9233090877532959 900000 45
1.403332233428955 17.257381677627563 7.523178339004517 0.8649828433990479 1.0274701118469238 900000 50
0.40727972984313965 12.023508548736572 5.443192958831787 0.19211792945861816 0.15868902206420898 1000000 5
0.5305178165435791 13.161483526229858 6.0279154777526855 0.28882408142089844 0.2844874858856201 1000000 10
0.5822970867156982 13.129249572753906 6.105649709701538 0.34000134468078613 0.38373661041259766 1000000 15
0.6937270164489746 14.890564680099487 6.575379848480225 0.4329056739807129 0.5033669471740723 1000000 20
0.7773816585540771 15.492402076721191 6.950749158859253 0.5183086395263672 0.5980038642883301 1000000 25
0.9377760887145996 16.36484718322754 7.379507780075073 0.5818893909454346 0.7678296566009521 1000000 30
1.1535537242889404 18.077881574630737 7.621563911437988 0.6844296455383301 0.8405098915100098 1000000 35
1.1699094772338867 18.010520935058594 8.226751804351807 0.8575878143310547 0.9766383171081543 1000000 40
1.2081797122955322 18.11430048942566 7.851854562759399 0.8144299983978271 0.9940125942230225 1000000 45
1.4206912517547607 19.5051326751709 8.125325202941895 1.085237741470337 1.2057914733886719 1000000 50
0.5674328804016113 8.154977321624756 0.2777557373046875 0.2375640869140625 1500000 5
0.7198166847229004 8.923152446746826 0.3985931873321533 0.4140894412994385 1500000 10
0.893761396408081 8.994568586349487 0.5146143436431885 0.5803694725036621 1500000 15
0.9987597465515137 9.69135594367981 0.7078702449798584 0.7371163368225098 1500000 20
1.2749443054199219 10.523739576339722 0.7884507179260254 0.9500133991241455 1500000 25
1.479644536972046 11.505565166473389 0.9576103687286377 1.1703763008117676 1500000 30
1.557192087173462 11.728650331497192 1.055130958557129 1.2657108306884766 1500000 35
1.711704969406128 12.33794903755188 1.098801612854004 1.504199504852295 1500000 40
1.9058468341827393 12.731279134750366 1.316988468170166 1.6100919246673584 1500000 45
2.020297050476074 12.75241208076477 1.3684704303741455 1.8280689716339111 1500000 50
0.7828242778778076 11.062356233596802 0.37008142471313477 0.3337748050689697 2000000 5
0.9714515209197998 11.631509780883789 0.5251476764678955 0.5464987754821777 2000000 10
1.2903399467468262 12.401080131530762 0.7703204154968262 0.7792866230010986 2000000 15
1.3245594501495361 13.23576545715332 0.841073751449585 1.0177249908447266 2000000 20
1.655200481414795 14.751662015914917 1.087345838546753 1.3309471607208252 2000000 25
1.8569626808166504 15.004031658172607 1.2052526473999023 1.3936614990234375 2000000 30
2.1442954540252686 15.173534154891968 1.3027911186218262 1.5693118572235107 2000000 35
2.3093109130859375 16.166301250457764 1.6742689609527588 1.9131817817687988 2000000 40
2.5918900966644287 16.877101182937622 1.9743854999542236 2.276275157928467 2000000 45
2.847254753112793 17.9345703125 2.0809388160705566 2.7702748775482178 2000000 50
1.0804898738861084 14.840218305587769 0.4913818836212158 0.4163689613342285 2500000 5
1.279991626739502 15.255966186523438 0.7242376804351807 0.756105899810791 2500000 10
1.52510404586792 16.14608597755432 0.9320752620697021 1.0224905014038086 2500000 15
1.8944294452667236 18.17721724510193 1.2748997211456299 1.3383855819702148 2500000 20
2.1789121627807617 19.599281072616577 1.4728469848632812 1.8438222408294678 2500000 25
2.465121030807495 18.203420639038086 1.4691143035888672 1.963454008102417 2500000 30
2.5842747688293457 19.90189504623413 1.74428129196167 2.1789989471435547 2500000 35
3.014279842376709 20.82987642288208 2.0556840896606445 2.3748979568481445 2500000 40
3.1936933994293213 21.861979722976685 2.466902256011963 2.797136068344116 2500000 45
3.7169172763824463 22.07574963569641 2.7154057025909424 3.0970945358276367 2500000 50
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment