Created
March 24, 2017 05:08
-
-
Save spencebeecher/fc53a12dd9a6b313b5aace697aa1e356 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
import pysparnn as snn | |
# In[2]: | |
import pysparnn.matrix_distance | |
# In[3]: | |
import sklearn | |
import sklearn.metrics.pairwise | |
import scipy.sparse as sparse | |
import numpy as np | |
class UserCustomDistance(pysparnn.matrix_distance.MatrixMetricSearch): | |
def __init__(self, features, records_data): | |
super(UserCustomDistance, self).__init__(features, records_data) | |
self.matrix = self.matrix | |
self.max_overlap = self.matrix.shape[0] # for testing purpose | |
@staticmethod | |
def features_to_matrix(features): | |
return features | |
@staticmethod | |
def vstack(matrix_list): | |
return np.vstack(matrix_list) | |
def _transform_value(self, v): | |
return v | |
def user_distance_metric(self, u, v): | |
rep = sparse.csr_matrix(np.minimum(u.A, v.A)) | |
return self.max_overlap - rep.sum() | |
def _distance(self, a_matrix): | |
return sklearn.metrics.pairwise.pairwise_distances( | |
a_matrix, self.matrix, lambda u, v: self.user_distance_metric(u, v)) | |
# In[4]: | |
class SpencerUserCustomDistance(pysparnn.matrix_distance.MatrixMetricSearch): | |
def __init__(self, features, records_data): | |
super(SpencerUserCustomDistance, self).__init__(features, records_data) | |
self.matrix = self.matrix | |
self.max_overlap = self.matrix.shape[0] # for testing purpose | |
@staticmethod | |
def features_to_matrix(features): | |
return features | |
@staticmethod | |
def vstack(matrix_list): | |
return np.vstack(matrix_list) | |
def _transform_value(self, v): | |
return 1.0 if v > 0 else 0.0 | |
def user_distance_metric(self, u, v): | |
rep = sparse.csr_matrix(np.minimum(u.A, v.A)) | |
return self.max_overlap - rep.sum() | |
def _distance(self, a_matrix): | |
return np.array(self.matrix.sum(axis=1) - self.matrix.dot(a_matrix.transpose()).transpose()) | |
# In[5]: | |
B = sparse.csr_matrix([[1, 1, 1],[1, 1, 1],[1, 1, 1]]) | |
A = sparse.csr_matrix( | |
[ | |
[2, 2, 1], | |
[1, 0, 1], | |
[1, 1, 1] | |
]) | |
# In[6]: | |
u_dist = UserCustomDistance(A, range(A.shape[0])) | |
u_dist._distance(A) | |
# In[7]: | |
su_dist = SpencerUserCustomDistance(A, range(A.shape[0])) | |
su_dist._distance(A) | |
# In[8]: | |
import scipy | |
# In[10]: | |
from scipy.sparse import rand | |
# In[11]: | |
num_records = 1000000 | |
matrix = rand(num_records, num_records, density=0.00002, format='csr') | |
# In[12]: | |
matrix.sum(axis=1).mean() | |
# In[13]: | |
import pysparnn.cluster_index as ci | |
# In[ ]: | |
import time | |
# In[ ]: | |
t0 = time.time() | |
cp2 = ci.MultiClusterIndex(matrix, np.array(range(num_records)), distance_type=SpencerUserCustomDistance) | |
t1 = time.time() | |
# In[ ]: | |
t1 - t0 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment