Skip to content

Instantly share code, notes, and snippets.

@stampyzfanz
Last active January 22, 2022 05:39
Show Gist options
  • Save stampyzfanz/659e73fd8bb208fe3d597a5dfec8a587 to your computer and use it in GitHub Desktop.
Save stampyzfanz/659e73fd8bb208fe3d597a5dfec8a587 to your computer and use it in GitHub Desktop.
Combine tcx files into one.
# 1 Create a file minify.py in that directory with these contents or download it
# 2 Execute it with the command 'py tcxcombiner.py' in order to combine the files into one, and to optionally minify it
# The minification works by removing every x location data point and removing 94% of the distanceMeters, time, and altiitude tag.
# If any of them are desired, edit the following Trues to False
remove_distance = True
remove_time = True
remove_altitude = True
# The maximum for https://www.gpsvisualizer.com/ is 10mb
# 9 mb is below
size_constraint = 9000000
# this is what percent looks like
# size_constraint = "60%"
import random
import re
import sys
import os
xml = ''
if os.path.isfile('everything.tcx'):
with open('everything.tcx', 'r') as f:
xml = open('everything.tcx').read()
else:
listeddir = os.listdir()
files = {}
for filename in listeddir:
if not filename.endswith('.tcx') or filename == "everything.tcx" or filename == "minified.tcx":
continue
with open(filename, 'r') as f:
data = f.read(10000)
date = re.findall(r"StartTime=\"(.*?)(?:T|\")", data)[0]
files[filename] = date
files = sorted(files.items(), key=lambda item: item[1])
for i, file in enumerate(files):
with open(file[0], 'r') as f:
data = f.read()
if i == 0:
# the stuff beforehand
xml = data.split('<Activity')[0]
activityData = re.findall('<Activity.*</Activity>', data, re.S)[0]
activityData = re.sub("(<Activity.*?>)", r'\1<Notes>'
+ file[0].replace(".tcx", "") + " - " +
file[1] + "</Notes>", activityData)
regex_to_remove = ""
regex_to_remove += '(<DistanceMeters>.*?</DistanceMeters>)|' if remove_distance else ""
regex_to_remove += '(<Time>.*?</Time>)|' if remove_time else ""
regex_to_remove += '(<AltitudeMeters.*?</AltitudeMeters>)|' if remove_altitude else ""
removeParts = list(re.finditer(regex_to_remove[:-1], activityData))
for i, match in enumerate(reversed(removeParts)):
if 20 < i < len(removeParts) - 20 or random.random() < 0.04:
continue
activityData = activityData[:match.start(
)] + activityData[match.end():]
xml += activityData
xml += '</Activities></TrainingCenterDatabase>'
open('everything.tcx', 'w').write(xml)
print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb')
if type(size_constraint) is str:
size_constraint = int(size_constraint[:-1])/100
else:
size_constraint = 1 - size_constraint / sys.getsizeof(xml)
indexesToKeep = [[0]]
for i, match in enumerate(re.finditer('<Trackpoint>', xml)):
if random.random() < size_constraint:
endTrackpointIndex = xml.find('</Trackpoint>', match.end())
if xml.find('<Activity>', match.end() + 13, endTrackpointIndex+2) != -1:
continue
indexesToKeep[-1].append(match.start())
indexesToKeep.append([endTrackpointIndex + 13])
indexesToKeep[-1].append(len(xml))
xml = ''.join([xml[i:j] for i, j in indexesToKeep])
print(f'Remove { min(100, max(0, int(size_constraint*100))) }% of datapoints')
print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb')
open('minified.tcx', 'w').write(xml)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment