Skip to content

Instantly share code, notes, and snippets.

@uppfinnarjohnny
Forked from archie/playcount.py
Created April 20, 2012 05:54
Show Gist options
  • Save uppfinnarjohnny/2426437 to your computer and use it in GitHub Desktop.
Save uppfinnarjohnny/2426437 to your computer and use it in GitHub Desktop.
Playcount.py
import sys
import random
import math
from collections import defaultdict
if len(sys.argv) != 3:
print "Usage: ./playcount.py <file_to_parse> <output_counts_to_file>"
exit(1)
countmap = defaultdict(int)
# count occurrences per user-video pair
with open(sys.argv[1], 'r') as handle:
for line in handle.readlines():
key = tuple(line.strip().split()[1:2])
countmap[key] += 1
# create a testing set and let the remaining be the training set
samplemap = dict([(s, countmap.pop(s)) for s in random.sample(countmap, int(math.ceil(len(countmap)*0.25)))])
# store everything
def writefile(filename, data):
with open(filename, 'w') as outhandle:
for (user, video),value in data.iteritems():
outhandle.write("%d %d %d\n" % (useridmap[user], videoidmap[video], value))
writefile("%s-test" % sys.argv[2], samplemap)
writefile("%s-train" % sys.argv[2], countmap)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment