Skip to content

Instantly share code, notes, and snippets.

@kv-kunalvyas
Last active February 14, 2017 02:52
Show Gist options
  • Save kv-kunalvyas/8fd7003a86d297fc0ac22490716d71d8 to your computer and use it in GitHub Desktop.
Save kv-kunalvyas/8fd7003a86d297fc0ac22490716d71d8 to your computer and use it in GitHub Desktop.
Code Sample: Python function to calculate shortest and farthest points in a two dimensional cluster
def closest_and_farthest(data):
"""
Finds closest and farthest apartments from a set of data
:param data: a pandas dataframe of the data
:returns: apartment ids and rounded off distances
"""
def calc_dist(lat_long_1, lat_long_2):
"""
Calculates distance(mi) between two latitude-longitude pairs
Reference: https://en.wikipedia.org/wiki/Great-circle_distance
:param lat_long_1: tuple containing (row_id, latitude, longitude)
:param lat_long_2: tuple containing (row_id, latitude, longitude)
:returns: distance between two coordinates in miles, ids of two
coordinates as set of tuple
# Check for zero(same coordinates)
"""
earth_radius = 3959.0 # miles
# check if points have the same coordinates
if lat_long_1[1] == lat_long_2[1] and lat_long_1[2] == lat_long_2[2]:
return None
try:
phi_1 = radians(float(lat_long_1[1]))
lambda_1 = radians(float(lat_long_1[2]))
phi_2 = radians(float(lat_long_2[1]))
lambda_2 = radians(float(lat_long_2[2]))
except TypeError:
# If the coordinates are not float, points are discarded
print 'Skipped calculating distance between two points because at\
least one of them is not a number'
return None
delta_lambda = abs(lambda_2-lambda_1)
central_angle = acos(sin(phi_1) * sin(phi_2) +
cos(phi_1) * cos(phi_2) * cos(delta_lambda)
)
distance = earth_radius * central_angle
point_ids = [lat_long_1[0], lat_long_2[0]]
# sorting the ids so that its easy to compare them going ahead
point_ids.sort()
# returning the [distance, set([tuple(point_ids)])] datatype which will
# be used to perform distance comparisons ahead
return [distance, set([tuple(point_ids)])]
def brute_force_dist(coordinates, mode='min'):
"""
Calculates shortest/largest distance between set of points by doing a
comprehensive check - checking all values with others
"""
if mode=='min':
# setting distance as infinity as starting value
dist = [float('inf'), set()]
for i in range(len(coordinates)):
for j in range(i+1, len(coordinates)):
# checking if coordinates(i,j) are equal and ignoring them
# by skipping calculating distance between them.
# distance_ij is the distance in miles between i-th and j-th
# points in iteration of coordinates
distance_ij = calc_dist(coordinates[i], coordinates[j])
if distance_ij:
# choose min of current distance and distance between
# i and j coordinates
dist = compare_dist(dist, distance_ij, mode='min')
elif mode=='max':
# setting distance as negative infinity as starting value
dist = [float('-inf'), set()]
for i in range(len(coordinates)):
for j in range(i+1, len(coordinates)):
# making sure coordinates i and j are different
distance_ij = calc_dist(coordinates[i], coordinates[j])
if distance_ij:
# assign max of current distance and distance between
# i and j coordinates and update current max
dist = compare_dist(dist, distance_ij, mode='max')
else:
raise ValueError("mode parameter to brute_force_dist must contain\
one value from 'min', 'max' or nothing(default=min).")
# return the resulting distance
return dist
def compare_dist(dist_1, dist_2, mode='min'):
"""
Compares distances between two points and returns min/max depending
on the mode parameter. If points have same distance and different pair
of points, then the points are appended to the set of tuples and result
is returned.
:param dist_1: list returned by calc_dist and of the format -
[distance, set([tuple(point_ids)])] where point_ids are sorted in
ascending order
:param dist_2: distance and points of the same format as dist_1
:param mode: optional parameter, if not given, the minimum distance is
compared and if it is 'max', then maximum of the two distances is
returned
"""
# checking for pair of points with equal distances
if dist_1[0] == dist_2[0]:
# add the point ids to the set if the pair are different
# (the points are sorted when they are added to the tuple)
dist_1[1] = dist_1[1] | dist_2[1]
return dist_1
# if distances between two points are different
if mode=='min':
# return minimum of two distances
if dist_1[0] > dist_2[0]:
return dist_2
return dist_1
elif mode=='max':
# return maximum of two distances
if dist_1[0] > dist_2[0]:
return dist_1
return dist_2
def closest_apts(coordinates, min_dist=[float('inf'), set()]):
"""
Finds the closest pair of points.
:param coordinates: this is a first param
:param first_run: this is a second param
:returns: the ids and distance in miles
:raises keyError: raises an exception if x
"""
# base case: division of points not required if there are 2 or 3 points
if len(coordinates) in (2,3):
return compare_dist(min_dist, brute_force_dist(coordinates))
# divide points into two parts and choose a line to divide them
left_points = coordinates[:len(coordinates)/2]
right_points = coordinates[len(coordinates)/2:]
center = right_points[0]
# Recurse on the set of left and right points and update min_dist
min_dist = compare_dist(min_dist, closest_apts(left_points))
min_dist = compare_dist(min_dist, closest_apts(right_points))
# Check points across central boundary
delta = min_dist
# if there are points near the boundary in the range of the delta value,
# then search for them and compare their dist with min_value
# 69 is the approximate value of a degree of latitude in miles
center_points = [x for x in coordinates \
if delta[0] >= abs(x[1]-center[1]) * 69.0]
min_dist = compare_dist(min_dist, brute_force_dist(center_points))
return min_dist
def farthest_apts(coordinates):
"""
The function calculates the two farthest apartments in the system
:param coordinates: The id, lat-long pair of data
:returns: list containing tuple(s) of pair(s) of points and distance
between them
"""
def direction(p1, p2, p3):
"""
Computes if three points are making a clockwise or counter-clockwise
turn, or going straight
:param p1: point 1
:param p2: point 2
:param p3: point 3
:returns: value less than 0 if counter-clockwise, greater than 0 if
clockwise, and 0 if neither
"""
return (p2[1]-p1[1])*(p3[2]-p1[2])-(p2[2]-p1[2])*(p3[1]-p1[1])
def sort_by_angle(head):
"""
Helps to sort points by polar angle
"""
def condition(p1, p2):
# check if points are going clockwise or counter-clockwise
compare = direction(head, p1, p2)
# if direction is less than zero, the points are turning in
# counter-clockwise direction
if compare < 0: return 1
# if direction is greater than zero, the points are turning in
# clockwise direction
elif compare > 0: return -1
# else not changing direction
else: return 0
return condition
def graham_scan(coordinates):
"""
The Graham Scan algorithm sorts the points by their polar angles and
creates a boundary around the point system
"""
if len(coordinates) < 3:
return coordinates
# find the leftmost point in the system and remove it from the list
head = coordinates.pop(0)
# sort the remaining points in counterclockwise manner
coordinates = sorted(coordinates, cmp=sort_by_angle(head))
# the stack will contain the points in the convex hull
stack = []
stack.append(head)
stack.append(coordinates[0])
stack.append(coordinates[1])
# iterate through the points and keep updating the stack based on
# clockwise and counter-clockwise turns made
for point in range(2, len(coordinates)):
while len(stack) > 3 and \
direction(stack[-2], stack[-1], coordinates[point]) <= 0:
del stack[-1]
stack.append(coordinates[point])
return stack
# compare the points on the convex hull and return the largest distance
return brute_force_dist(graham_scan(coordinates), mode='max')
# calling required functions
# setting the coordinates as a list of id, lat and long values
coordinates = zip(data.index, data['Latitude'], data['Longitude'])
# Sorts the points based on latitude values
coordinates = sorted(coordinates, key=lambda x: x[1])
# print apartment ids and distance between them
closest = closest_apts(coordinates)
farthest = farthest_apts(coordinates)
print '-------------------------------'
print 'closest apartments =', np.around(closest[0], 2), '\n'.join(str(p) for p in closest[1])
print 'farthest apartments =', np.around(farthest[0], 2), '\n'.join(str(p) for p in farthest[1])
print '-------------------------------'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment