Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Extracts duration from gpx and tcx files for further processing
#!/usr/local/bin/python
# encoding: utf-8
"""
activity_extractor.py
Created by Christian Stade-Schuldt on 2013-10-29.
"""
import os
from lxml import etree as et
from datetime import timedelta, datetime
PATH = "/Users/stade/Documents/Dropbox/Eigene Dateien/GPS-Tracks/Processed"
def extract_period(activity_file):
"""Takes .gpx or .tcx file and returns list
of tuples withdatetime object and activity type"""
# files are organized activity_type/year/file.{gpx,tcx}
activity_type = activity_file.split("/")[-3]
root = et.parse(activity_file).getroot()
extension = activity_file.split(".")[-1]
if extension == "gpx":
times = root.findall(".//{http://www.topografix.com/GPX/1/1}time")
start = datetime.strptime(times[0].text, "%Y-%m-%dT%H:%M:%SZ").replace(second=0, microsecond=0)
end = datetime.strptime(times[-1].text, "%Y-%m-%dT%H:%M:%SZ").replace(second=0, microsecond=0)
if extension == "tcx":
#tcx files use milliseconds
times = root.findall(".//{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}Time")
start = datetime.strptime(times[0].text, "%Y-%m-%dT%H:%M:%S.%fZ").replace(second=0, microsecond=0)
end = datetime.strptime(times[-1].text, "%Y-%m-%dT%H:%M:%S.%fZ").replace(second=0, microsecond=0)
#get longitude for time-offset
lon = root.find(".//{http://www.topografix.com/GPX/1/1}trkpt")
if lon != None:
offset = int(round(float(lon.attrib.get('lon')) * 24 / 360))
else:
offset = 1 # assume Central European Time if no longitude is present
#apply daylight saving time, roughly
if start.month in range(4, 11):
offset += 1
start = start + timedelta(hours=offset)
end = end + timedelta(hours=offset)
timeframe = []
while end-start > timedelta(0):
timeframe.append((start, activity_type))
start = start - timedelta(minutes=-1)
return timeframe
def main():
total_times = []
for root, files in os.walk(PATH):
for file_name in files:
if file_name.endswith("x"):
total_times.extend(extract_period(os.path.join(root, file_name)))
total_times = [(str(d[0].strftime("%Y-%m-%d %H:%M:%S")), d[1]) for d in total_times]
w = open("output.csv", "w")
w.write("Timestamp, Activity\n")
for t in total_times:
w.write("%s, %s\n" %(t[0], t[1]))
w.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.