Skip to content

Instantly share code, notes, and snippets.

@axil
Created January 13, 2023 11:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save axil/7a731de5e1f5eb8c1d60fca7d474bed6 to your computer and use it in GitHub Desktop.
Save axil/7a731de5e1f5eb8c1d60fca7d474bed6 to your computer and use it in GitHub Desktop.
bench-factorize
from contextlib import contextmanager
from time import perf_counter as clock
from itertools import count
import numpy as np
import pandas as pd
def f1(a):
return s.factorize()[0]
def f2(s):
return s.groupby(s, sort=False).ngroup().values
def f3(s):
u, idx, tags = np.unique(s.values, return_index=True, return_inverse=True)
return idx.argsort().argsort()[tags]
@contextmanager
def bench(r):
t1 = clock()
yield
t2 = clock()
r.append(t2-t1)
res = []
for i in count():
n = 2**i
a = np.random.randint(0, n, n)
s = pd.Series(a)
rr = []
for j in range(5):
r = []
with bench(r):
a1 = f1(s)
print(r)
with bench(r):
a2 = f2(s)
print(r)
with bench(r):
a3 = f3(s)
print(r)
rr.append(r)
if max(r) > 0.5:
break
res.append(np.min(rr, axis=0))
if np.max(rr) > 0.4:
break
np.save('results.npy', np.array(res))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment