Skip to content

Instantly share code, notes, and snippets.

@cjdd3b
Last active December 20, 2015 12:49
Show Gist options
  • Save cjdd3b/6133658 to your computer and use it in GitHub Desktop.
Save cjdd3b/6133658 to your computer and use it in GitHub Desktop.
import numpy
def get_similar(vec, matrix, K=10):
# Set up the query vector and the whole dataset for K-nearest neighbors query
qvector = numpy.array([vec]).transpose()
alldata = numpy.array(matrix).transpose()
# You can't get more neighbors than there are entities
ndata = alldata.shape[1]
K = K if K < ndata else ndata
# Calculate Euclidean distances between query vector and other points
# and then return the sorted indices of the closest items
sqd = numpy.sqrt(((alldata - qvector[:,:ndata]) ** 2 ).sum(axis=0))
idx = numpy.argsort(sqd) # sorting
# Return the indices of the most similar rows in order
similar = []
for i in idx[:K]:
similar.append(i)
return similar
if __name__ == '__main__':
vec = [1, 0, 0, 0, 0, 1]
matrix = [
[1, 0, 0, 0, 1, 1],
[0, 1, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 1]
]
print get_similar(vec, matrix, 3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment