Skip to content

Instantly share code, notes, and snippets.

@raliste
Created January 12, 2011 04:45
Show Gist options
  • Save raliste/775704 to your computer and use it in GitHub Desktop.
Save raliste/775704 to your computer and use it in GitHub Desktop.
Distancia euclidiana y factor de correlación de Pearson. Similaridades en sets pequeños!
from math import sqrt
critics = {
'Rod': {
'Superman': 1.0,
'Forest Gump': 7.0,
'Transformers': 2.5,
},
'Novia': {
'Superman': 0.8,
'Forest Gump': 6.5,
'Transformers': 2.3,
'XXY': 1.0,
'Munich': 3.0,
},
'Jose': {
'Superman': 3.5,
'Forest Gump': 3.0,
'Transformers': 2.5,
'XXY': 4.0,
'Munich': 5.0,
},
'Andres': {
'Superman': 1.0,
'Forest Gump': 2.0,
'Transformers': 7.0,
'XXY': 1.0,
'Munich': 4.5,
}
}
# Returns a euclidian distance based similarity score for person 1 and person 2
def sim_distance(prefs, person1, person2):
si=dict()
for item in prefs[person1]:
if item in prefs[person2]:
si[item]=1
if len(si) == 0:
return 0
sum_of_squares = sum(
[pow(prefs[person1][item]-prefs[person2][item], 2) for item in si]
)
return 1/(1+sqrt(sum_of_squares))
# Returns the pearson correlation score for person 1 and person 2
def sim_pearson(prefs, person1, person2):
si=dict()
for item in prefs[person1]:
if item in prefs[person2]:
si[item]=1
n=len(si)
if n == 0:
return 0
# Add up all the prefs
sum1 = sum([prefs[person1][item] for item in si])
sum2 = sum([prefs[person2][item] for item in si])
# Sum up the squares
sum1Sq = sum([pow(prefs[person1][item], 2) for item in si])
sum2Sq = sum([pow(prefs[person2][item], 2) for item in si])
# Sum up the products
pSum = sum([prefs[person1][item] * prefs[person2][item] for item in si])
# Calculate pearson
num = pSum-(sum1*sum2/n)
den = sqrt(
(sum1Sq-pow(sum1,2)/n) * (sum2Sq-pow(sum2,2)/n))
if den == 0:
return 0
r = num/den
return r
# Returns the best maches for person from the prefs dict.
# It calculates the similarity score for one user against all other users.
# It then sorts the scores in reverse order.
def topMatches(prefs, person, n=10, sim_algorithm=sim_pearson):
scores = [
(sim_algorithm(prefs, person, other), other)
for other in prefs if other !=person
]
# Sort the list
scores.sort(reverse=True)
return scores[0:n]
def getRecommendations(prefs, person, sim_algorithm=sim_pearson):
totals=dict()
simSums=dict()
for user in prefs:
if user == person:
continue
sim = sim_algorithm(prefs, person, user)
if sim <= 0:
continue
for item in prefs[user]:
# Only score for movies I haven't seen yet, which is pretty obvious
if item not in prefs[person] or prefs[person][item] == 0:
# Calculates similarity * score, so the closer a person is to me,
# the score will be higher.
totals.setdefault(item, 0)
totals[item] += prefs[user][item] * sim
# Sum of similarities
simSums.setdefault(item, 0)
simSums[item] += sim
rankings = [
(total/simSums[item], item) for item,total in totals.items()
]
rankings.sort(reverse=True)
return rankings
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment