Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
interactions = da.from_npy_stack('interactions')
users = interactions[:,0]
items = interactions[:,1]
slicer = 10000000
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i == 0:
user_set = set(users[i*slicer: (i+1)*slicer].compute())
else:
user_set = user_set.union(set(users[i*slicer: (i+1)*slicer].compute()))
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i == 0:
item_set = set(items[i*slicer: (i+1)*slicer].compute())
else:
item_set = item_set.union(set(items[i*slicer: (i+1)*slicer].compute()))
user_id_map = {v:i for i,v in enumerate(user_set)}
item_id_map = {v:i for i,v in enumerate(item_set)}
with open('user_id_map.pkl', 'wb') as f:
pickle.dump(user_id_map, f)
with open('item_id_map.pkl', 'wb') as f:
pickle.dump(item_id_map, f)
def get_user(user):
return np.array([user_id_map[x] for x in user])
def get_item(item):
return np.array([item_id_map[x] for x in item])
interactions = da.from_npy_stack('interactions', mmap_mode=None)
users = interactions[:,0]
items = interactions[:,1]
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i != 0:
user_mapped = da.concatenate([user_mapped,
get_user(users[i*slicer: (i+1)*slicer].compute())])
else:
user_mapped = get_user(users[i*slicer: (i+1)*slicer].compute())
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i != 0:
item_mapped = da.concatenate([item_mapped,
get_item(items[i*slicer: (i+1)*slicer].compute())])
else:
item_mapped = get_item(items[i*slicer: (i+1)*slicer].compute())
da.to_npy_stack('users', user_mapped)
print('saving items')
da.to_npy_stack('items', item_mapped)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.