Skip to content

Instantly share code, notes, and snippets.

@dengshilong
Created March 19, 2013 03:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dengshilong/5193515 to your computer and use it in GitHub Desktop.
Save dengshilong/5193515 to your computer and use it in GitHub Desktop.
import math
from operator import itemgetter, attrgetter
from collections import defaultdict
def UserSimilarity(train):
#build inverse table for item_users
print "build inverse table for item_users"
item_users = defaultdict(set)
for u,items in train.iteritems():
for i in items.keys():
item_users[i].add(u)
#calculate co-rated items between users
print 'calculate co-rated items between users'
C = defaultdict(dict)
N = defaultdict(int)
for i,users in item_users.iteritems():
for u in users:
N[u] += 1
for v in users:
if u == v:
continue
if v not in C[u]:
C[u][v] = 0
C[u][v] += 1
#calculate finial similarity matrix W
print 'calculate finial similarity matrix W'
W = defaultdict(dict)
for u,related_users in C.items():
for v,cuv in related_users.items():
W[u][v] = cuv / math.sqrt(N[u] * N[v])
return W
def LoadRating(trainFile):
print 'loading rating'
train = defaultdict(dict)
with open(trainFile,'r') as f:
for line in f:
values = line.split('\t')
uid = int(values[0])
iid = int(values[1])
rating = int(values[2])
train[uid][iid] = rating
return train
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment