Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
def to_dask_array(df):
# https://stackoverflow.com/questions/37444943/dask-array-from-dataframe?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
partitions = df.to_delayed()
shapes = [part.values.shape for part in partitions]
dtypes = partitions[0].dtypes
results = compute(dtypes, *shapes) # trigger computation to find shape
dtypes, shapes = results[0], results[1:]
chunks = [da.from_delayed(part.values, shape, dtypes)
for part, shape in zip(partitions, shapes)]
return da.concatenate(chunks, axis=0)
interactions = to_dask_array(df[['user_id', 'repo_id', 'created_at']])
da.to_npy_stack('interactions', interactions)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.