Skip to content

Instantly share code, notes, and snippets.

@jimr
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimr/ec8b8cc6c9e04fca84fd to your computer and use it in GitHub Desktop.
Save jimr/ec8b8cc6c9e04fca84fd to your computer and use it in GitHub Desktop.
Helper function for cleaning up user input geographic coordinates. Call get_coords(crappy_x, crappy_y) to get cleaned coordinates. No guarantees, check your results, etc.
def clean_point(point):
# junk
point = point.strip()
junk = [u'\ufeff', ' ', u'\xa0', u'\u2013'] # [, ,  , –]
for symbol in junk:
point = point.replace(symbol, '')
point = point.lstrip('+')
point = point.lstrip("'") # leading quote chars to fool excel
point = point.lstrip(',')
point = point.rstrip(',')
point = point.rstrip('.')
point = point.rstrip('N')
point = point.lstrip('N')
point = point.rstrip('E')
point = point.lstrip('E')
# e.g 1.23°S -> -1.23°
sw_patterns = [re.compile(u'[SW](.*)'), re.compile(u'(.*)[SW]')]
for p in sw_patterns:
match = p.match(point)
if match:
coord = match.groups()[0]
if not coord.startswith('-'):
point = u'-%s' % coord
# degrees
degrees = [u'\xb0', u'\xba'] # [°, º]
for symbol in degrees:
# e.g. 50.12345°N
p = re.compile(u'(-?\d+(\.\d+)?)%s$' % symbol)
match = p.match(point)
if match:
# If all we have is a number with a trailing degree symbol then we
# can just strip it because we're already decimal
point = match.groups()[0]
for symbol in degrees:
point = point.replace(symbol, '.')
# minutes
minutes = [u'\u2019', u'\u2032', u'\xb4', "'", ':'] # [’, ′, ´, ', :]
for symbol in minutes:
# if we got degrees and decimal minutes (e.g "65° 42.00714'", which by
# now would be "65.42.00714'") we don't want need to replace the minute
# symbol with a point because it's already been done.
p = re.compile(u'(-?\d+\.\d+(\.\d+)?)%s$' % symbol)
match = p.match(point)
if match:
point = match.groups()[0]
break
for symbol in minutes:
point = point.replace(symbol, '.')
# seconds
point = point.rstrip(u'\xa8') # ¨
point = point.rstrip(u'\u2033') # ″
point = point.rstrip(u'\u201d') # ”
point = point.rstrip('"')
return point
def dms_to_dec(point):
# split on points and multiply by dms (then sum)
point_array = point.split('.', 2)
total = 0
multiplications = [1.0, 60.0, 3600.0]
for i, value in enumerate(point_array):
total += float(value) / multiplications[i]
return total
def get_coords(lat, lon):
def _get_coord(point):
point = point.strip()
if point in ['TBC', 'N/A', '??', '']:
return None
point = clean_point(point)
try:
point = float(point)
except:
point = dms_to_dec(point)
return point
return _get_coord(lat), _get_coord(lon)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment