Skip to content

Instantly share code, notes, and snippets.

@dal
Created December 31, 2012 01:38
Show Gist options
  • Save dal/4416699 to your computer and use it in GitHub Desktop.
Save dal/4416699 to your computer and use it in GitHub Desktop.
Simple python script to remove unwanted discontinuities in gpx track files.
#!/usr/bin/python
'''
Split gpx tracks at discontinuities, that is, pairs of points in a track that
are further than a given distance threshold.
'''
import collections
from lxml import etree
import math
import optparse
import re
import sys
# great circle distance threshold in miles
dist_threshold = 0.15
nauticalMilePerLat = 60.00721
nauticalMilePerLongitude = 60.10793
milesPerNauticalMile = 1.15078
Point = collections.namedtuple('Point', 'lat lon')
def getDist(pt1, pt2):
'''
Return squared distance between two points.
@param pt1: The first Point instance.
@param pt2: The second Point instance.
@return Squared distance in mercator degrees.
'''
lat1 = conformLat(pt1.lat)
lat2 = conformLat(pt2.lat)
return math.pow((lat1 - lat2), 2) + \
math.pow((math.radians(pt1.lon) - math.radians(pt2.lon)), 2)
def conformLat(lat):
'''
Conform the supplied latitude to Mercator projection.
@param lat: Latitude in degrees.
@return: Conformed latitude in degrees.
'''
rlat = math.radians(lat)
return math.log(math.tan(rlat) + (1/math.cos(rlat)))
def gcDist(pt1, pt2):
'''
Caclulate great circle distance between two lat lons in miles.
@param pt1: The first Point instance.
@param pt2: The second Point instance.
@return: Great circle distance between the two points in miles.
'''
yDistance = (pt2.lat - pt1.lat) * nauticalMilePerLat
xDistance = (math.cos(math.radians(pt1.lat)) + math.cos(math.radians(pt2.lat))) \
* (pt2.lon - pt1.lon) * (nauticalMilePerLongitude / 2)
distance = math.sqrt( yDistance**2 + xDistance**2 )
return distance * milesPerNauticalMile
def parseGpx(path):
'''
Parse the supplied gpx file and return list of lists of Point instances
for all tracks in the file.
@param path: Path to a gpx file.
@return: tuple of list of lists of point instances and the lxml.etree
node instance representing the namespaces of the gpx file.
'''
lines = list()
total_dist = 0.0
max_dist = 0.0
i = 0
segCount = 0
nsmap = None
with open(path) as fd:
tree = etree.parse(fd)
gpx = tree.getroot()
nsmap = gpx.nsmap
namespace = nsmap.get(None)
assert(namespace)
namespace = '{{{0}}}'.format(namespace)
with open(path) as fd:
for _, trk in etree.iterparse(fd, tag='{0}trk'.format(namespace)):
tracks = trk.findall('%strkseg' % namespace)
for trkseg in tracks:
i += 1
lastpt = None
sys.stderr.write("trkseg %d\n" % i)
myTrack = list()
trkpts = trkseg.findall('{0}trkpt'.format(namespace))
for trkpt in trkpts:
lat = trkpt.get('lat')
lon = trkpt.get('lon')
pt = Point(float(lat), float(lon))
if lastpt:
segCount += 1
dist = gcDist(lastpt, pt)
total_dist += dist
max_dist = max(dist, max_dist)
if dist > dist_threshold:
sys.stderr.write("Splitting track\n")
# Split the track
if len(myTrack) > 1:
lines.append(myTrack)
myTrack = list()
elif lines and lines[-1]:
lastPt = lines[-1][-1]
dist = gcDist(lastPt, pt)
if dist < dist_threshold:
myTrack.append(lastPt)
myTrack.append(pt)
lastpt = pt
lines.append(myTrack)
if segCount:
sys.stderr.write('Mean distance: %smi max: %smi\n' % (total_dist/segCount,
max_dist))
else:
sys.stderr.write("No segments found\n")
return lines, nsmap
def dumpTracksToGpx(tracks, nsmap, fd):
'''
Write the supplied list of lists of Point instances representing tracks to
the supplied file object.
@param tracks: list of lists of Point instances representing tracks.
@param nsmap: namespace
@param fd: File descriptor to which to write the tracks in gpx format.
'''
ns = '{{{0}}}'.format(nsmap.get(None))
root = etree.Element(ns + "gpx", nsmap=nsmap)
for track in tracks:
myTrk = etree.Element(ns+"trk")
myTrkSeg = etree.Element(ns+"trkseg")
myTrk.append(myTrkSeg)
for pt in track:
myTrkSeg.append(etree.Element(ns+"trkpt", lat=str(pt.lat), lon=str(pt.lon)))
root.append(myTrk)
fd.write(etree.tostring(root, pretty_print=True))
fd.flush()
def main():
'''
Take the supplied gpx files. For each track in the file, split tracks at
any two points that are further apart than the distance threshold (default
0.15 miles).
Prints all the new tracks to stdout in gpx format.
'''
global dist_threshold
usage = "usage: %prog [-dist <num>] path1 path2 . . ."
parser = optparse.OptionParser(usage=usage, description=__doc__)
parser.add_option('--dist',
'-d',
type='float',
action='store',
help="Set the maximum distance threshold in miles")
opts, args = parser.parse_args(sys.argv[1:])
if opts.dist:
dist_threshold = opts.dist
tracks = list()
nsmap = None
for path in args:
mytracks, mynsmap = parseGpx(path)
nsmap = mynsmap or nsmap
tracks.extend(mytracks)
if tracks and nsmap:
dumpTracksToGpx(tracks, nsmap, sys.stdout)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment