Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created August 16, 2012 20:29
ranking
def calculate_ranking(self, item_keys, values):
'''
Emit items with similarity in key for ranking:
19,0.4 70,1
19,0.6 21,2
21,0.6 19,2
21,0.9 70,1
70,0.4 19,1
70,0.9 21,1
'''
similarity, n = values
item_x, item_y = item_keys
if int(n) > 0:
yield (item_x, similarity), (item_y, n)
def top_similar_items(self, key_sim, similar_ns):
'''
For each item emit K closest items in comma separated file:
De La Soul;A Tribe Called Quest;0.6;1
De La Soul;2Pac;0.4;2
'''
item_x, similarity = key_sim
for item_y, n in similar_ns:
print '%s;%s;%f;%d' % (item_x, item_y, similarity, n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment