Last active
June 30, 2021 17:27
-
-
Save alexcpn/1f187f2114976e748f4d3ad38dea17e8 to your computer and use it in GitHub Desktop.
Creating a KD tree and algorithm for finding nearest neighbour
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
from operator import itemgetter | |
import matplotlib.pyplot as plt | |
from mpl_toolkits.axes_grid1 import host_subplot | |
fig =plt.figure(figsize=(10, 8), dpi=80) | |
ax = plt.gca() | |
def parse_tree(node,prev_node=None,label='Root'): | |
# Helper utility to display the KD tree graphically to understand the spatial split | |
# recursively parses the KD tree | |
if node is None: | |
return | |
ax.plot(node.cell[0],node.cell[1],'ro') | |
ax.text(node.cell[0],node.cell[1],label + ' ' + str(node.cell[2])) | |
if prev_node: | |
ax.plot([node.cell[0],prev_node.cell[0]],[node.cell[1],prev_node.cell[1]]) | |
parse_tree(node.left_branch,node,label='L') | |
parse_tree(node.right_branch,node,label='R') | |
def find_nearest_neighbour(node,point,distance_fn,current_axis): | |
# Algorith to find nearest neighbour in a KD Tree;the KD tree has done a spatial sort | |
# of the given co-ordinates, such that to the left of the root lies co-ordinates nearest to the x-axis | |
# and to the right of the root ,lies the co-ordinates farthest from the x axis | |
# On the y axis split on the left of the parent/root node lies co-ordinates nearest to the y-axis and to | |
# the right of the root, lies the co-ordinates farthest from the y axis | |
# to find the nearest neightbour, from the root, you first check left and right node; if distance is closer | |
# to the right node,then the entire left node can be discarded from search, because of the spatial split | |
# and that node becomes the root node. This process is continued recursively till the nearest is found | |
# param:node: The current node | |
# param: point: The point to which the nearest neighbour is to be found | |
# param: distance_fn: to calculate the nearest neighbour | |
# param: current_axis: here assuming only two dimenstion and current axis will be either x or y , 0 or 1 | |
if node is None: | |
return None,None | |
current_closest_node = node | |
closest_known_distance = distance_fn(node.cell[0],node.cell[1],point[0],point[1]) | |
print closest_known_distance,node.cell | |
x = (node.cell[0],node.cell[1]) | |
y = point | |
new_node = None | |
new_closest_distance = None | |
if x[current_axis] > y[current_axis]: | |
new_node,new_closest_distance= find_nearest_neighbour(node.left_branch,point,distance_fn, | |
(current_axis+1) %2) | |
else: | |
new_node,new_closest_distance = find_nearest_neighbour(node.right_branch,point,distance_fn, | |
(current_axis+1) %2) | |
if new_closest_distance and new_closest_distance < closest_known_distance: | |
print 'Reset closest node to ',new_node.cell | |
closest_known_distance = new_closest_distance | |
current_closest_node = new_node | |
return current_closest_node,closest_known_distance | |
class Node(namedtuple('Node','cell, left_branch, right_branch')): | |
# This Class is taken from wikipedia code snippet for KD tree | |
pass | |
def create_kdtree(cell_list,current_axis,no_of_axis): | |
# Creates a KD Tree recursively following the snippet from wikipedia for KD tree | |
# but making it generic for any number of axis and changes in data strucure | |
if not cell_list: | |
return | |
# sget the cell as a tuple list | |
k= [(cell[0],cell[1],cell[2]) for cell in cell_list] | |
k.sort(key=itemgetter(current_axis)) # sort on the current axis | |
median = len(k) // 2 # get the median of the list | |
axis = (current_axis + 1) % no_of_axis # cycle the axis | |
return Node(k[median], # recurse | |
create_kdtree(k[:median],axis,no_of_axis), | |
create_kdtree(k[median+1:],axis,no_of_axis)) | |
#point_list = [(13.04454, 77.621003,1), (13.047443, 77.61785,2), (13.048644, 77.619951,3), | |
# (13.049337, 77.620301,4), (13.051384, 77.618898,5)] | |
cell_list = create_random_points_around_loc(10, 13.049763315394207 , 77.619163323666285,.200) | |
print cell_list | |
tree = create_kdtree(cell_list,0,2) | |
parse_tree(tree) | |
plt.plot(13.04454, 77.621003,'bo') | |
plt.show() | |
node,distance = find_nearest_neighbour(tree,(13.04454, 77.621003),haversine,0) | |
print 'Nearest Neighbour=',node.cell,distance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
RADIUS_OF_EARTH_IN_KM = 6372.8 | |
def haversine(lat1, lon1, lat2, lon2): | |
""" | |
Utility to calcutlate distance between two pointtodo explain regarding height | |
coverting from geodisc co-ordinate to cartestian gives errors when distances are further apart | |
""" | |
dLat = np.radians(lat2 - lat1) | |
dLon = np.radians(lon2 - lon1) | |
lat1 = np.radians(lat1) | |
lat2 = np.radians(lat2) | |
a = np.sin(dLat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dLon/2)**2 | |
c = 2*np.arcsin(np.sqrt(a)) | |
return RADIUS_OF_EARTH_IN_KM * c | |
def create_random_point(x0,y0,distance): | |
""" | |
Utility method for simulation of the points | |
""" | |
r = distance/ 111300 | |
u = np.random.uniform(0,1) | |
v = np.random.uniform(0,1) | |
w = r * np.sqrt(u) | |
t = 2 * np.pi * v | |
x = w * np.cos(t) | |
x1 = x / np.cos(y0) | |
y = w * np.sin(t) | |
return (x0+x1, y0 +y) | |
def create_random_points_around_loc(max_elements,lat1,long1,distance_in_meter): | |
list_of_points= list(tuple()) | |
for x in range(0,max_elements): | |
latitude2,longitude2 =create_random_point(lat1,long1,distance_in_meter*1000) | |
list_of_points.append((latitude2,longitude2,x)) | |
return list_of_points | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment