Skip to content

Instantly share code, notes, and snippets.

@xiejuncs
Last active March 14, 2024 15:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xiejuncs/6f2694564263907dd09743f61d5377a9 to your computer and use it in GitHub Desktop.
Save xiejuncs/6f2694564263907dd09743f61d5377a9 to your computer and use it in GitHub Desktop.
KNN simple Python code example
# Use multiple to quantify the vector close or far away from the origin (query vector in this example).
def get_vector(dimension, multiple):
vector = []
for i in range(dimension):
vector.append(multiple * 0.000001)
return vector
def get_all_zero_vector(dimension):
vector = []
for i in range(dimension):
vector.append(0)
return vector
def l2_squared_distance(first, second):
assert (len(first) == len(second))
distance = 0.0
for i in range(len(first)):
d = first[i] - second[i]
distance += d * d
return distance
def knn(num_of_vectors, dimension, num_of_items):
# Use array directly instead of numpy.array for the illustration purpose.
query_vector = get_all_zero_vector(dimension)
example_data_set = []
for i in range(num_of_vectors):
vector = get_vector(dimension, i)
# Add the id and vector into the dataset.
example_data_set.append((i, l2_squared_distance(query_vector, vector)))
# Sort data based on the distance.
example_data_set.sort(key=lambda data: data[1])
return example_data_set[0:num_of_items]
if __name__ == '__main__':
res = knn(1000, 128, 10)
formatted_text = "id: {id}, distance: {distance}"
for item in res:
print(formatted_text.format(id=item[0], distance=item[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment