Skip to content

Instantly share code, notes, and snippets.

@BalazsHoranyi
Created May 14, 2018 20:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BalazsHoranyi/89a72c051c4d25c66830b591f8c5e6e9 to your computer and use it in GitHub Desktop.
Save BalazsHoranyi/89a72c051c4d25c66830b591f8c5e6e9 to your computer and use it in GitHub Desktop.
interactions = da.from_npy_stack('interactions')
users = interactions[:,0]
items = interactions[:,1]
slicer = 10000000
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i == 0:
user_set = set(users[i*slicer: (i+1)*slicer].compute())
else:
user_set = user_set.union(set(users[i*slicer: (i+1)*slicer].compute()))
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i == 0:
item_set = set(items[i*slicer: (i+1)*slicer].compute())
else:
item_set = item_set.union(set(items[i*slicer: (i+1)*slicer].compute()))
user_id_map = {v:i for i,v in enumerate(user_set)}
item_id_map = {v:i for i,v in enumerate(item_set)}
with open('user_id_map.pkl', 'wb') as f:
pickle.dump(user_id_map, f)
with open('item_id_map.pkl', 'wb') as f:
pickle.dump(item_id_map, f)
def get_user(user):
return np.array([user_id_map[x] for x in user])
def get_item(item):
return np.array([item_id_map[x] for x in item])
interactions = da.from_npy_stack('interactions', mmap_mode=None)
users = interactions[:,0]
items = interactions[:,1]
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i != 0:
user_mapped = da.concatenate([user_mapped,
get_user(users[i*slicer: (i+1)*slicer].compute())])
else:
user_mapped = get_user(users[i*slicer: (i+1)*slicer].compute())
for i in tqdm(range(math.ceil((len(interactions))/slicer))):
if i != 0:
item_mapped = da.concatenate([item_mapped,
get_item(items[i*slicer: (i+1)*slicer].compute())])
else:
item_mapped = get_item(items[i*slicer: (i+1)*slicer].compute())
da.to_npy_stack('users', user_mapped)
print('saving items')
da.to_npy_stack('items', item_mapped)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment