Last active
February 14, 2017 02:52
-
-
Save kv-kunalvyas/8fd7003a86d297fc0ac22490716d71d8 to your computer and use it in GitHub Desktop.
Code Sample: Python function to calculate shortest and farthest points in a two dimensional cluster
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def closest_and_farthest(data): | |
""" | |
Finds closest and farthest apartments from a set of data | |
:param data: a pandas dataframe of the data | |
:returns: apartment ids and rounded off distances | |
""" | |
def calc_dist(lat_long_1, lat_long_2): | |
""" | |
Calculates distance(mi) between two latitude-longitude pairs | |
Reference: https://en.wikipedia.org/wiki/Great-circle_distance | |
:param lat_long_1: tuple containing (row_id, latitude, longitude) | |
:param lat_long_2: tuple containing (row_id, latitude, longitude) | |
:returns: distance between two coordinates in miles, ids of two | |
coordinates as set of tuple | |
# Check for zero(same coordinates) | |
""" | |
earth_radius = 3959.0 # miles | |
# check if points have the same coordinates | |
if lat_long_1[1] == lat_long_2[1] and lat_long_1[2] == lat_long_2[2]: | |
return None | |
try: | |
phi_1 = radians(float(lat_long_1[1])) | |
lambda_1 = radians(float(lat_long_1[2])) | |
phi_2 = radians(float(lat_long_2[1])) | |
lambda_2 = radians(float(lat_long_2[2])) | |
except TypeError: | |
# If the coordinates are not float, points are discarded | |
print 'Skipped calculating distance between two points because at\ | |
least one of them is not a number' | |
return None | |
delta_lambda = abs(lambda_2-lambda_1) | |
central_angle = acos(sin(phi_1) * sin(phi_2) + | |
cos(phi_1) * cos(phi_2) * cos(delta_lambda) | |
) | |
distance = earth_radius * central_angle | |
point_ids = [lat_long_1[0], lat_long_2[0]] | |
# sorting the ids so that its easy to compare them going ahead | |
point_ids.sort() | |
# returning the [distance, set([tuple(point_ids)])] datatype which will | |
# be used to perform distance comparisons ahead | |
return [distance, set([tuple(point_ids)])] | |
def brute_force_dist(coordinates, mode='min'): | |
""" | |
Calculates shortest/largest distance between set of points by doing a | |
comprehensive check - checking all values with others | |
""" | |
if mode=='min': | |
# setting distance as infinity as starting value | |
dist = [float('inf'), set()] | |
for i in range(len(coordinates)): | |
for j in range(i+1, len(coordinates)): | |
# checking if coordinates(i,j) are equal and ignoring them | |
# by skipping calculating distance between them. | |
# distance_ij is the distance in miles between i-th and j-th | |
# points in iteration of coordinates | |
distance_ij = calc_dist(coordinates[i], coordinates[j]) | |
if distance_ij: | |
# choose min of current distance and distance between | |
# i and j coordinates | |
dist = compare_dist(dist, distance_ij, mode='min') | |
elif mode=='max': | |
# setting distance as negative infinity as starting value | |
dist = [float('-inf'), set()] | |
for i in range(len(coordinates)): | |
for j in range(i+1, len(coordinates)): | |
# making sure coordinates i and j are different | |
distance_ij = calc_dist(coordinates[i], coordinates[j]) | |
if distance_ij: | |
# assign max of current distance and distance between | |
# i and j coordinates and update current max | |
dist = compare_dist(dist, distance_ij, mode='max') | |
else: | |
raise ValueError("mode parameter to brute_force_dist must contain\ | |
one value from 'min', 'max' or nothing(default=min).") | |
# return the resulting distance | |
return dist | |
def compare_dist(dist_1, dist_2, mode='min'): | |
""" | |
Compares distances between two points and returns min/max depending | |
on the mode parameter. If points have same distance and different pair | |
of points, then the points are appended to the set of tuples and result | |
is returned. | |
:param dist_1: list returned by calc_dist and of the format - | |
[distance, set([tuple(point_ids)])] where point_ids are sorted in | |
ascending order | |
:param dist_2: distance and points of the same format as dist_1 | |
:param mode: optional parameter, if not given, the minimum distance is | |
compared and if it is 'max', then maximum of the two distances is | |
returned | |
""" | |
# checking for pair of points with equal distances | |
if dist_1[0] == dist_2[0]: | |
# add the point ids to the set if the pair are different | |
# (the points are sorted when they are added to the tuple) | |
dist_1[1] = dist_1[1] | dist_2[1] | |
return dist_1 | |
# if distances between two points are different | |
if mode=='min': | |
# return minimum of two distances | |
if dist_1[0] > dist_2[0]: | |
return dist_2 | |
return dist_1 | |
elif mode=='max': | |
# return maximum of two distances | |
if dist_1[0] > dist_2[0]: | |
return dist_1 | |
return dist_2 | |
def closest_apts(coordinates, min_dist=[float('inf'), set()]): | |
""" | |
Finds the closest pair of points. | |
:param coordinates: this is a first param | |
:param first_run: this is a second param | |
:returns: the ids and distance in miles | |
:raises keyError: raises an exception if x | |
""" | |
# base case: division of points not required if there are 2 or 3 points | |
if len(coordinates) in (2,3): | |
return compare_dist(min_dist, brute_force_dist(coordinates)) | |
# divide points into two parts and choose a line to divide them | |
left_points = coordinates[:len(coordinates)/2] | |
right_points = coordinates[len(coordinates)/2:] | |
center = right_points[0] | |
# Recurse on the set of left and right points and update min_dist | |
min_dist = compare_dist(min_dist, closest_apts(left_points)) | |
min_dist = compare_dist(min_dist, closest_apts(right_points)) | |
# Check points across central boundary | |
delta = min_dist | |
# if there are points near the boundary in the range of the delta value, | |
# then search for them and compare their dist with min_value | |
# 69 is the approximate value of a degree of latitude in miles | |
center_points = [x for x in coordinates \ | |
if delta[0] >= abs(x[1]-center[1]) * 69.0] | |
min_dist = compare_dist(min_dist, brute_force_dist(center_points)) | |
return min_dist | |
def farthest_apts(coordinates): | |
""" | |
The function calculates the two farthest apartments in the system | |
:param coordinates: The id, lat-long pair of data | |
:returns: list containing tuple(s) of pair(s) of points and distance | |
between them | |
""" | |
def direction(p1, p2, p3): | |
""" | |
Computes if three points are making a clockwise or counter-clockwise | |
turn, or going straight | |
:param p1: point 1 | |
:param p2: point 2 | |
:param p3: point 3 | |
:returns: value less than 0 if counter-clockwise, greater than 0 if | |
clockwise, and 0 if neither | |
""" | |
return (p2[1]-p1[1])*(p3[2]-p1[2])-(p2[2]-p1[2])*(p3[1]-p1[1]) | |
def sort_by_angle(head): | |
""" | |
Helps to sort points by polar angle | |
""" | |
def condition(p1, p2): | |
# check if points are going clockwise or counter-clockwise | |
compare = direction(head, p1, p2) | |
# if direction is less than zero, the points are turning in | |
# counter-clockwise direction | |
if compare < 0: return 1 | |
# if direction is greater than zero, the points are turning in | |
# clockwise direction | |
elif compare > 0: return -1 | |
# else not changing direction | |
else: return 0 | |
return condition | |
def graham_scan(coordinates): | |
""" | |
The Graham Scan algorithm sorts the points by their polar angles and | |
creates a boundary around the point system | |
""" | |
if len(coordinates) < 3: | |
return coordinates | |
# find the leftmost point in the system and remove it from the list | |
head = coordinates.pop(0) | |
# sort the remaining points in counterclockwise manner | |
coordinates = sorted(coordinates, cmp=sort_by_angle(head)) | |
# the stack will contain the points in the convex hull | |
stack = [] | |
stack.append(head) | |
stack.append(coordinates[0]) | |
stack.append(coordinates[1]) | |
# iterate through the points and keep updating the stack based on | |
# clockwise and counter-clockwise turns made | |
for point in range(2, len(coordinates)): | |
while len(stack) > 3 and \ | |
direction(stack[-2], stack[-1], coordinates[point]) <= 0: | |
del stack[-1] | |
stack.append(coordinates[point]) | |
return stack | |
# compare the points on the convex hull and return the largest distance | |
return brute_force_dist(graham_scan(coordinates), mode='max') | |
# calling required functions | |
# setting the coordinates as a list of id, lat and long values | |
coordinates = zip(data.index, data['Latitude'], data['Longitude']) | |
# Sorts the points based on latitude values | |
coordinates = sorted(coordinates, key=lambda x: x[1]) | |
# print apartment ids and distance between them | |
closest = closest_apts(coordinates) | |
farthest = farthest_apts(coordinates) | |
print '-------------------------------' | |
print 'closest apartments =', np.around(closest[0], 2), '\n'.join(str(p) for p in closest[1]) | |
print 'farthest apartments =', np.around(farthest[0], 2), '\n'.join(str(p) for p in farthest[1]) | |
print '-------------------------------' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment