Skip to content

Instantly share code, notes, and snippets.

@takuti
Last active January 20, 2016 09:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takuti/622ee8c3d786d292ce6d to your computer and use it in GitHub Desktop.
Save takuti/622ee8c3d786d292ce6d to your computer and use it in GitHub Desktop.
Collaborative Filtering for row items in a given N-by-M matrix
import numpy as np
class CF:
"""Collaborative filtering for row items in a given N-by-M matrix
"""
def __init__(self, mat):
self.mat = mat
self.N, self.M = mat.shape
def similarity(self, v1, v2, func='cosine'):
"""Compute similarity between v1 and v2
v1 and v2 are vectors (1D numpy array)
"""
if np.array_equal(v1, v2): return 1.
if func == 'cosine':
v1_norm = np.sqrt(sum(v1 ** 2))
v2_norm = np.sqrt(sum(v2 ** 2))
if v1_norm == 0. or v2_norm == 0.: return 0.
return float(sum(v1 * v2)) / (v1_norm * v2_norm)
elif func == 'euclidean':
return 1. / (1. + np.sqrt(sum((v1 - v2) ** 2)))
def run(self):
self.sims_mat = np.zeros((self.N, self.N))
for i in xrange(self.N):
for j in xrange(i, self.N):
sim = self.similarity(self.mat[i, :], self.mat[j, :])
self.sims_mat[i, j] = self.sims_mat[j, i] = sim
def top_n(self, n):
"""Return Top-N similar items' indices for each row
"""
if not hasattr(self, 'sims_mat'): self.run()
tops_idx = self.sims_mat[0, :].argsort()[::-1][:n]
tops_sim = np.sort(self.sims_mat[0, :])[::-1][:n]
for i in xrange(1, self.N):
tops_idx = np.vstack((tops_idx, self.sims_mat[i, :].argsort()[::-1][:n]))
tops_sim = np.vstack((tops_sim, np.sort(self.sims_mat[i, :])[::-1][:n]))
return tops_idx, tops_sim
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment