Skip to content

Instantly share code, notes, and snippets.

@rmacy
Created December 28, 2012 04:16
Show Gist options
  • Save rmacy/4394410 to your computer and use it in GitHub Desktop.
Save rmacy/4394410 to your computer and use it in GitHub Desktop.
critics = {
'Lisa Rose': {
'Lady in the water': 2.5,
'Snakes on a plane': 3.5,
'Just my luck': 3.0,
'Superman returns': 3.5,
'You, Me and Dupree': 2.5,
'The night listener': 3.0,
},
'Gene Seymoud': {
'Lady in the water': 3.0,
'Snakes on a plane': 3.5,
'Just my luck': 3.0,
'Superman returns': 5.0,
'You, Me and Dupree': 3.5,
'The night listener': 3.0,
},
'Michael Phillips': {
'Lady in the water': 2.5,
'Snakes on a plane': 3.0,
'Superman returns': 3.5,
'The night listener': 4.0,
},
'Claudia Puig': {
'Snakes on a plane': 3.5,
'Just my luck': 3.0,
'The night listener': 4.5,
'Superman returns': 4.0,
'You, Me and Dupree': 2.5,
},
'Mick LaSalle': {
'Lady in the water': 3.0,
'Snakes on a plane': 4.0,
'Just my luck': 2.0,
'Superman returns': 3.0,
'The night listener': 3.0,
'You, Me and Dupree': 2.0,
},
'Jack Matthews': {
'Lady in the water': 3.0,
'Snakes on a plane': 4.0,
'The night listener': 3.0,
'Superman returns': 5.0,
'You, Me and Dupree': 3.5,
},
'Toby': {
'Snakes on a plane': 4.5,
'You, Me and Dupree': 1.0,
'Superman returns': 4.0,
},
}
from math import sqrt
def sim_distance(prefs, person1, person2):
si = {}
for item in prefs[person1]:
if item in prefs[person2]:
si[item] = 1
if len(si) == 0:
return 0
sum_of_squares = sum([pow(prefs[person1][item]-prefs[person2][item],2) for item in si])
return 1/(1+sqrt(sum_of_squares))
def sim_pearson(prefs, p1, p2):
si = {}
for item in prefs[p1]:
if item in prefs[p2]:
si[item] = 1
n = len(si)
if n == 0:
return 0
sum1 = sum([prefs[p1][it] for it in si])
sum2 = sum([prefs[p2][it] for it in si])
sum1Sq = sum([pow(prefs[p1][it], 2) for it in si])
sum2Sq = sum([pow(prefs[p2][it], 2) for it in si])
pSum = sum(prefs[p1][it]*prefs[p2][it] for it in si)
num = pSum - (sum1*sum2/n)
den = sqrt((sum1Sq - pow(sum1, 2)/n)*(sum2Sq - pow(sum2, 2)/n))
if den == 0:
return 0
r = num /den
return r
def topMatches(prefs, person, n=5, similarity=sim_pearson):
scores = [(similarity(prefs, person, other), other)
for other in prefs if other != person]
scores.sort()
scores.reverse()
return scores[0:n]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment