Skip to content

Instantly share code, notes, and snippets.

@pbruneau
Last active November 2, 2021 13:49
Show Gist options
  • Save pbruneau/53312fe4bef649b7780f8f63bf09fbe1 to your computer and use it in GitHub Desktop.
Save pbruneau/53312fe4bef649b7780f8f63bf09fbe1 to your computer and use it in GitHub Desktop.
cudf with pandas MultiIndex
from datetime import datetime
import pandas as pd
import cudf
import numpy as np
start = pd.Timestamp(datetime.strptime('2021-03-12 00:00+0000', '%Y-%m-%d %H:%M%z'))
end = pd.Timestamp(datetime.strptime('2021-03-12 11:00+0000', '%Y-%m-%d %H:%M%z'))
timestamps = pd.date_range(start, end, freq='1H')
value = np.random.normal(size=12)
df = pd.DataFrame(value, index=timestamps, columns=['value'])
# SUCCEEDS
stamp = pd.Timestamp(datetime.strptime('2021-03-12 03:00+0000', '%Y-%m-%d %H:%M%z'))
print(df.loc[stamp])
# SUCCEEDS
df_gpu = cudf.from_pandas(df)
print(df_gpu.loc[stamp])
start = pd.Timestamp(datetime.strptime('2021-03-12 00:00+0000', '%Y-%m-%d %H:%M%z'))
end = pd.Timestamp(datetime.strptime('2021-03-12 03:00+0000', '%Y-%m-%d %H:%M%z'))
timestamps = pd.date_range(start, end, freq='1H')
labels = ['A', 'B', 'C']
index = pd.MultiIndex.from_product([timestamps, labels], names=["timestamp", "label"])
value = np.random.normal(size=12)
df = pd.DataFrame(value, index=index, columns=['value'])
# SUCCEEDS
stamp = pd.Timestamp(datetime.strptime('2021-03-12 02:00+0000', '%Y-%m-%d %H:%M%z'))
print(df.loc[stamp])
# FAILS
df_gpu = cudf.from_pandas(df)
print(df_gpu.loc[stamp])
# WORKAROUND
print(df_gpu.loc[(stamp,)])
start = pd.Timestamp(datetime.strptime('2021-03-12 01:00+0000', '%Y-%m-%d %H:%M%z'))
end = pd.Timestamp(datetime.strptime('2021-03-12 02:00+0000', '%Y-%m-%d %H:%M%z'))
timestamps = pd.date_range(start, end, freq='1H')
# SUCCEEDS
print(df.loc[timestamps])
# FAILS
print(df_gpu.loc[timestamps])
# ALSO FAILS
print(df_gpu.loc[(timestamps,)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment