Skip to content

Instantly share code, notes, and snippets.

@rajarsheem
Last active November 17, 2015 08:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rajarsheem/96c03a17197d656c0905 to your computer and use it in GitHub Desktop.
Save rajarsheem/96c03a17197d656c0905 to your computer and use it in GitHub Desktop.
baseball economics politics Europe Asia soccer war security shopping family num-attr User 1 User 2
doc1 1 0 1 0 1 1 0 0 0 1 5 1 -1
doc2 0 1 1 1 0 0 0 1 0 0 4 -1 1
doc3 0 0 0 1 1 1 0 0 0 0 3
doc4 0 0 1 1 0 0 1 1 0 0 4 1
doc5 0 1 0 0 0 0 0 0 1 1 3
doc6 1 0 0 1 0 0 0 0 0 0 2 1
doc7 0 0 0 0 0 0 0 1 0 1 2
doc8 0 0 1 1 0 0 1 0 0 1 4
doc9 0 0 0 0 0 1 0 0 1 0 2
doc10 0 1 0 0 1 0 1 0 0 0 3
doc11 0 0 1 0 1 0 0 0 1 0 3
doc12 1 0 0 0 0 1 1 0 0 0 3 -1
doc13 0 0 1 1 1 0 0 1 0 0 4
doc14 0 1 1 1 0 0 0 0 1 0 4
doc15 0 0 0 1 0 1 1 1 0 0 4
doc16 1 0 0 0 0 1 0 0 1 0 3 1
doc17 0 1 1 1 0 0 0 1 0 0 4 1
doc18 0 0 0 1 0 0 0 0 1 0 2
doc19 0 1 1 0 1 0 1 0 0 1 5 -1
doc20 0 0 1 1 0 0 1 0 1 0 4
DF 4 6 10 11 6 6 7 6 7 5
import csv
import numpy as np
import math
def col(i,lis):
return [r[i] for r in lis][:-1]
def predict(l,n,d,idf=1):
profile1, profile2, docscore1,docscore2 = [],[],[],[]
for i in range(1,d-3):
c = col(i,l)
profile1.append(np.dot(c,col(d-2,l)))
profile2.append(np.dot(c,col(d-1,l)))
print ("profiles")
print (profile1)
print (profile2)
if idf is 1:
docscore1 = [np.dot(profile1,r[1:-3]) for r in l[:-1]]
docscore2 = [np.dot(profile2,r[1:-3]) for r in l[:-1]]
else:
print ("debug",len(idf),len(profile1))
docscore1 = [sum([x*y*z for x,y,z in zip(profile1,r[1:-3],idf)]) for r in l[:-1]]
docscore2 = [sum([x*y*z for x,y,z in zip(profile2,r[1:-3],idf)]) for r in l[:-1]]
print ("doc scores")
print (docscore1)
print (docscore2)
print ("user1 will like document {d} (score = {s}) best".format(d=docscore1.index(max(docscore1))+1,s=max(docscore1)))
print ("user1 will like document {d} (score = {s}) 2nd best".format(d=docscore1.index(sorted(docscore1)[-2])+1,s=sorted(docscore1)[-2]))
print ("user1's prediction for doc9 is",docscore1[8])
print ("user2 will like document {d} (score = {s}) best".format(d=docscore2.index(max(docscore2))+1,s=max(docscore2)))
print ("No. of docs user2 will dislike is ",sum(1 for x in docscore2 if x < 0))
with open('ml-latest-small/ass.csv') as f:
data = csv.reader(f,delimiter=',')
next(data)
l = list(data)
n = len(l)
d = len(l[0])
for i in range(n):
for j in range(1,d):
l[i][j] = int(l[i][j]) if l[i][j]!='' else 0
predict(l,n,d)
for r in l[:-1]:
r[1:-3] = [x/math.sqrt(r[-3]) for x in r[1:-3]]
predict(l,n,d)
idf = [1/x for x in l[-1][1:-3]]
predict(l,n,d,idf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment