Skip to content

Instantly share code, notes, and snippets.

@isVoid
Created June 14, 2021 22:25
Show Gist options
  • Save isVoid/8eb9c806bafa36d1665d50a4dde63b04 to your computer and use it in GitHub Desktop.
Save isVoid/8eb9c806bafa36d1665d50a4dde63b04 to your computer and use it in GitHub Desktop.
Using cupy/numpy in get_loc
import cupy as cp
import pandas as pd
import cudf
def make_non_monotonic_non_unique_multiindex(N):
half_n = int(N / 2)
l0 = cp.concatenate([cp.arange(0, half_n), cp.full(half_n, 42)])
l1 = cp.concatenate([cp.arange(half_n, N), cp.full(half_n, 42)])
random_order = cp.arange(0, N)
cp.random.shuffle(random_order)
l0[random_order] = l0
l1[random_order] = l1
l0_arr = l0.get()
l1_arr = l1.get()
return pd.MultiIndex.from_arrays([l0_arr, l1_arr])
cp.random.seed(0) # reproducibility
target = (42, 42)
for N in [10**5, 10**6, 10**7, 10**8]:
print (N)
pidx = make_non_monotonic_non_unique_multiindex(N)
gidx = cudf.from_pandas(pidx)
gidx.get_loc(target)
del pidx, gidx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment