Skip to content

Instantly share code, notes, and snippets.

@victorkohler
Last active June 12, 2019 20:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save victorkohler/11630be55ebfce4ad0674bb1983bc478 to your computer and use it in GitHub Desktop.
Save victorkohler/11630be55ebfce4ad0674bb1983bc478 to your computer and use it in GitHub Desktop.
def get_train_instances():
"""Samples a number of negative user-item interactions for each
user-item pair in our testing data.
Returns:
user_input (list): A list of all users for each item
item_input (list): A list of all items for every user,
both positive and negative interactions.
labels (list): A list of all labels. 0 or 1.
"""
user_input, item_input, labels = [],[],[]
zipped = set(zip(uids, iids))
for (u, i) in zip(uids,iids):
# Add our positive interaction
user_input.append(u)
item_input.append(i)
labels.append(1)
# Sample a number of random negative interactions
for t in range(num_neg):
j = np.random.randint(len(items))
while (u, j) in zipped:
j = np.random.randint(len(items))
user_input.append(u)
item_input.append(j)
labels.append(0)
return user_input, item_input, labels
def random_mini_batches(U, I, L, mini_batch_size=256):
"""Returns a list of shuffeled mini batched of a given size.
Args:
U (list): All users for every interaction
I (list): All items for every interaction
L (list): All labels for every interaction.
Returns:
mini_batches (list): A list of minibatches containing sets
of batch users, batch items and batch labels
[(u, i, l), (u, i, l) ...]
"""
mini_batches = []
shuffled_U, shuffled_I, shuffled_L = shuffle(U, I, L)
num_complete_batches = int(math.floor(len(U)/mini_batch_size))
for k in range(0, num_complete_batches):
mini_batch_U = shuffled_U[k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch_I = shuffled_I[k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch_L = shuffled_L[k * mini_batch_size : k * mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_U, mini_batch_I, mini_batch_L)
mini_batches.append(mini_batch)
if len(U) % mini_batch_size != 0:
mini_batch_U = shuffled_U[num_complete_batches * mini_batch_size: len(U)]
mini_batch_I = shuffled_I[num_complete_batches * mini_batch_size: len(U)]
mini_batch_L = shuffled_L[num_complete_batches * mini_batch_size: len(U)]
mini_batch = (mini_batch_U, mini_batch_I, mini_batch_L)
mini_batches.append(mini_batch)
return mini_batches
def get_hits(k_ranked, holdout):
"""Return 1 if an item exists in a given list and 0 if not. """
for item in k_ranked:
if item == holdout:
return 1
return 0
def eval_rating(idx, test_ratings, test_negatives, K):
"""Generate ratings for the users in our test set and
check if our holdout item is among the top K highest scores.
Args:
idx (int): Current index
test_ratings (list): Our test set user-item pairs
test_negatives (list): 100 negative items for each
user in our test set.
K (int): number of top recommendations
Returns:
hr (list): A list of 1 if the holdout appeared in our
top K predicted items. 0 if not.
"""
map_item_score = {}
# Get the negative interactions our user.
items = test_negatives[idx]
# Get the user idx.
user_idx = test_ratings[idx][0]
# Get the item idx, i.e. our holdout item.
holdout = test_ratings[idx][1]
# Add the holdout to the end of the negative interactions list.
items.append(holdout)
# Prepare our user and item arrays for tensorflow.
predict_user = np.full(len(items), user_idx, dtype='int32').reshape(-1,1)
np_items = np.array(items).reshape(-1,1)
# Feed user and items into the TF graph .
predictions = session.run([output_layer], feed_dict={user: predict_user, item: np_items})
# Get the predicted scores as a list
predictions = predictions[0].flatten().tolist()
# Map predicted score to item id.
for i in range(len(items)):
current_item = items[i]
map_item_score[current_item] = predictions[i]
# Get the K highest ranked items as a list
k_ranked = heapq.nlargest(K, map_item_score, key=map_item_score.get)
# Get a list of hit or no hit.
hits = get_hits(k_ranked, holdout)
return hits
def evaluate(df_neg, K=10):
"""Calculate the top@K hit ratio for our recommendations.
Args:
df_neg (dataframe): dataframe containing our holdout items
and 100 randomly sampled negative interactions for each
(user, item) holdout pair.
K (int): The 'K' number of ranked predictions we want
our holdout item to be present in.
Returns:
hits (list): list of "hits". 1 if the holdout was present in
the K highest ranked predictions. 0 if not.
"""
hits = []
test_u = df_test['user_id'].values.tolist()
test_i = df_test['item_id'].values.tolist()
test_ratings = list(zip(test_u, test_i))
df_neg = df_neg.drop(df_neg.columns[0], axis=1)
test_negatives = df_neg.values.tolist()
for idx in range(len(test_ratings)):
# For each idx, call eval_one_rating
hitrate = eval_rating(idx, test_ratings, test_negatives, K)
hits.append(hitrate)
return hits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment