Skip to content

Instantly share code, notes, and snippets.

/item_mean.py
Created Jun 13, 2014

Embed
What would you like to do?
import sys
class ItemMeanData(object):
def __init__(self):
self.global_sum = 0
self.global_count = 0
self.item_sums = {}
self.item_counts = {}
def train(self, trainfile):
with open(trainfile) as f:
for line in f:
user, item, rating = line.strip().split(',')[:3]
item = int(item)
rating = float(rating)
self.global_sum += rating
self.global_count += 1
if item not in self.item_sums:
self.item_sums[item] = rating
self.item_counts[item] = 1
else:
self.item_sums[item] += rating
self.item_counts[item] += 1
def global_mean(self):
return self.global_sum / self.global_count
def item_set(self):
return set(self.item_counts.iterkeys())
def item_mean_offsets(self):
means = {}
gmean = self.global_mean()
for item, n in self.item_counts.iteritems():
means[item] = self.item_sums[item] / n - gmean
return gmean, means
def score_items(self, to_score, output):
global_mean, item_means = self.item_mean_offsets()
for user, items in to_score.iteritems():
for item in items:
pred = global_mean
if item in item_means:
pred += item_means[item]
print >> output, "%s,%s,%.3f" % (user, item, pred)
def load_query_users(userfile, items):
to_score = {}
with open(userfile) as userf:
for line in userf:
user = int(line.strip())
to_score[user] = items
return to_score
#Read the command line arguments
if len(sys.argv) != 4:
userfile, trainfile = sys.argv[2:4]
else:
print >> sys.stderr, "Invalid Arguments."
system.exit(1)
#Trains the model using the training file
model = ItemMeanData()
model.train(trainfile)
if userfile is not None:
to_score = load_query_users(userfile, model.item_set())
model.score_items(to_score, sys.stdout)
else:
print >> sys.stderr, "no user file specified"
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.