Skip to content

Instantly share code, notes, and snippets.

@alexcpn
Last active June 30, 2021 17:27
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexcpn/1f187f2114976e748f4d3ad38dea17e8 to your computer and use it in GitHub Desktop.
Save alexcpn/1f187f2114976e748f4d3ad38dea17e8 to your computer and use it in GitHub Desktop.
Creating a KD tree and algorithm for finding nearest neighbour
from collections import namedtuple
from operator import itemgetter
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import host_subplot
fig =plt.figure(figsize=(10, 8), dpi=80)
ax = plt.gca()
def parse_tree(node,prev_node=None,label='Root'):
# Helper utility to display the KD tree graphically to understand the spatial split
# recursively parses the KD tree
if node is None:
return
ax.plot(node.cell[0],node.cell[1],'ro')
ax.text(node.cell[0],node.cell[1],label + ' ' + str(node.cell[2]))
if prev_node:
ax.plot([node.cell[0],prev_node.cell[0]],[node.cell[1],prev_node.cell[1]])
parse_tree(node.left_branch,node,label='L')
parse_tree(node.right_branch,node,label='R')
def find_nearest_neighbour(node,point,distance_fn,current_axis):
# Algorith to find nearest neighbour in a KD Tree;the KD tree has done a spatial sort
# of the given co-ordinates, such that to the left of the root lies co-ordinates nearest to the x-axis
# and to the right of the root ,lies the co-ordinates farthest from the x axis
# On the y axis split on the left of the parent/root node lies co-ordinates nearest to the y-axis and to
# the right of the root, lies the co-ordinates farthest from the y axis
# to find the nearest neightbour, from the root, you first check left and right node; if distance is closer
# to the right node,then the entire left node can be discarded from search, because of the spatial split
# and that node becomes the root node. This process is continued recursively till the nearest is found
# param:node: The current node
# param: point: The point to which the nearest neighbour is to be found
# param: distance_fn: to calculate the nearest neighbour
# param: current_axis: here assuming only two dimenstion and current axis will be either x or y , 0 or 1
if node is None:
return None,None
current_closest_node = node
closest_known_distance = distance_fn(node.cell[0],node.cell[1],point[0],point[1])
print closest_known_distance,node.cell
x = (node.cell[0],node.cell[1])
y = point
new_node = None
new_closest_distance = None
if x[current_axis] > y[current_axis]:
new_node,new_closest_distance= find_nearest_neighbour(node.left_branch,point,distance_fn,
(current_axis+1) %2)
else:
new_node,new_closest_distance = find_nearest_neighbour(node.right_branch,point,distance_fn,
(current_axis+1) %2)
if new_closest_distance and new_closest_distance < closest_known_distance:
print 'Reset closest node to ',new_node.cell
closest_known_distance = new_closest_distance
current_closest_node = new_node
return current_closest_node,closest_known_distance
class Node(namedtuple('Node','cell, left_branch, right_branch')):
# This Class is taken from wikipedia code snippet for KD tree
pass
def create_kdtree(cell_list,current_axis,no_of_axis):
# Creates a KD Tree recursively following the snippet from wikipedia for KD tree
# but making it generic for any number of axis and changes in data strucure
if not cell_list:
return
# sget the cell as a tuple list
k= [(cell[0],cell[1],cell[2]) for cell in cell_list]
k.sort(key=itemgetter(current_axis)) # sort on the current axis
median = len(k) // 2 # get the median of the list
axis = (current_axis + 1) % no_of_axis # cycle the axis
return Node(k[median], # recurse
create_kdtree(k[:median],axis,no_of_axis),
create_kdtree(k[median+1:],axis,no_of_axis))
#point_list = [(13.04454, 77.621003,1), (13.047443, 77.61785,2), (13.048644, 77.619951,3),
# (13.049337, 77.620301,4), (13.051384, 77.618898,5)]
cell_list = create_random_points_around_loc(10, 13.049763315394207 , 77.619163323666285,.200)
print cell_list
tree = create_kdtree(cell_list,0,2)
parse_tree(tree)
plt.plot(13.04454, 77.621003,'bo')
plt.show()
node,distance = find_nearest_neighbour(tree,(13.04454, 77.621003),haversine,0)
print 'Nearest Neighbour=',node.cell,distance
from __future__ import division
import numpy as np
RADIUS_OF_EARTH_IN_KM = 6372.8
def haversine(lat1, lon1, lat2, lon2):
"""
Utility to calcutlate distance between two pointtodo explain regarding height
coverting from geodisc co-ordinate to cartestian gives errors when distances are further apart
"""
dLat = np.radians(lat2 - lat1)
dLon = np.radians(lon2 - lon1)
lat1 = np.radians(lat1)
lat2 = np.radians(lat2)
a = np.sin(dLat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dLon/2)**2
c = 2*np.arcsin(np.sqrt(a))
return RADIUS_OF_EARTH_IN_KM * c
def create_random_point(x0,y0,distance):
"""
Utility method for simulation of the points
"""
r = distance/ 111300
u = np.random.uniform(0,1)
v = np.random.uniform(0,1)
w = r * np.sqrt(u)
t = 2 * np.pi * v
x = w * np.cos(t)
x1 = x / np.cos(y0)
y = w * np.sin(t)
return (x0+x1, y0 +y)
def create_random_points_around_loc(max_elements,lat1,long1,distance_in_meter):
list_of_points= list(tuple())
for x in range(0,max_elements):
latitude2,longitude2 =create_random_point(lat1,long1,distance_in_meter*1000)
list_of_points.append((latitude2,longitude2,x))
return list_of_points
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment