Skip to content

Instantly share code, notes, and snippets.

@bigtonylewis
Last active May 27, 2020 13:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bigtonylewis/eb2913814869416ccbb82944c3662d32 to your computer and use it in GitHub Desktop.
Save bigtonylewis/eb2913814869416ccbb82944c3662d32 to your computer and use it in GitHub Desktop.
def test_pystore(n, nrows=1000, ncols=5):
pystore.set_path('./test_pystore')
store = pystore.store('teststore')
c = store.collection('testcollection')
# clean out any past test data
[c.delete_item(item) for item in c.list_items()]
c.list_items()
# we use this list of column names a few times
cols = ['col{}'.format(n) for n in range(ncols)]
df_arr = []
# loop through n times
for x in range(n):
# create some random floats
rands = pd.DataFrame(np.random.rand(nrows,ncols), columns=cols)
# create a column of random ints so we can group by them later
i = pd.DataFrame(np.random.randint(0,5, (nrows, 1)), columns=['i'])
# save it
df_arr.append(pd.concat([i, rands], axis=1))
print(df_arr[n-1].shape)
df_arr[n-1].sample(5)
# go through each of the saved arrays
for df in df_arr:
print('next df from array')
# group each df by the int column
for i, dfi in df.groupby('i'):
print(i, dfi.shape)
# append (or write) each grouped DF to pystore
try:
c.append('number{}'.format(i), dfi, npartitions=1)
except ValueError:
c.write('number{}'.format(i), dfi)
# now read them all back in and concatenate them, and sort deterministically
from_pystore = pd.concat([c.item(item).to_pandas() for item in c.list_items()]).sort_values(cols)
# combine the array of dfs, sorting deterministically
orig = pd.concat(df_arr).sort_values(cols)
if from_pystore.shape != orig.shape:
print('Sizes do not match: {} != {} when n={}'.format(from_pystore.shape, orig.shape, n))
return
if not from_pystore.equals(orig):
print('Sorted dataframes are not equal when n={}'.format(n))
return
print('The dataframes are the same when n={}'.format(n))
print('\n---------\nThis will work, n=1')
test_pystore(1)
print('\n---------\nThis will fail, n=2')
test_pystore(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment