Last active
January 22, 2022 05:39
-
-
Save stampyzfanz/659e73fd8bb208fe3d597a5dfec8a587 to your computer and use it in GitHub Desktop.
Combine tcx files into one.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1 Create a file minify.py in that directory with these contents or download it | |
# 2 Execute it with the command 'py tcxcombiner.py' in order to combine the files into one, and to optionally minify it | |
# The minification works by removing every x location data point and removing 94% of the distanceMeters, time, and altiitude tag. | |
# If any of them are desired, edit the following Trues to False | |
remove_distance = True | |
remove_time = True | |
remove_altitude = True | |
# The maximum for https://www.gpsvisualizer.com/ is 10mb | |
# 9 mb is below | |
size_constraint = 9000000 | |
# this is what percent looks like | |
# size_constraint = "60%" | |
import random | |
import re | |
import sys | |
import os | |
xml = '' | |
if os.path.isfile('everything.tcx'): | |
with open('everything.tcx', 'r') as f: | |
xml = open('everything.tcx').read() | |
else: | |
listeddir = os.listdir() | |
files = {} | |
for filename in listeddir: | |
if not filename.endswith('.tcx') or filename == "everything.tcx" or filename == "minified.tcx": | |
continue | |
with open(filename, 'r') as f: | |
data = f.read(10000) | |
date = re.findall(r"StartTime=\"(.*?)(?:T|\")", data)[0] | |
files[filename] = date | |
files = sorted(files.items(), key=lambda item: item[1]) | |
for i, file in enumerate(files): | |
with open(file[0], 'r') as f: | |
data = f.read() | |
if i == 0: | |
# the stuff beforehand | |
xml = data.split('<Activity')[0] | |
activityData = re.findall('<Activity.*</Activity>', data, re.S)[0] | |
activityData = re.sub("(<Activity.*?>)", r'\1<Notes>' | |
+ file[0].replace(".tcx", "") + " - " + | |
file[1] + "</Notes>", activityData) | |
regex_to_remove = "" | |
regex_to_remove += '(<DistanceMeters>.*?</DistanceMeters>)|' if remove_distance else "" | |
regex_to_remove += '(<Time>.*?</Time>)|' if remove_time else "" | |
regex_to_remove += '(<AltitudeMeters.*?</AltitudeMeters>)|' if remove_altitude else "" | |
removeParts = list(re.finditer(regex_to_remove[:-1], activityData)) | |
for i, match in enumerate(reversed(removeParts)): | |
if 20 < i < len(removeParts) - 20 or random.random() < 0.04: | |
continue | |
activityData = activityData[:match.start( | |
)] + activityData[match.end():] | |
xml += activityData | |
xml += '</Activities></TrainingCenterDatabase>' | |
open('everything.tcx', 'w').write(xml) | |
print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb') | |
if type(size_constraint) is str: | |
size_constraint = int(size_constraint[:-1])/100 | |
else: | |
size_constraint = 1 - size_constraint / sys.getsizeof(xml) | |
indexesToKeep = [[0]] | |
for i, match in enumerate(re.finditer('<Trackpoint>', xml)): | |
if random.random() < size_constraint: | |
endTrackpointIndex = xml.find('</Trackpoint>', match.end()) | |
if xml.find('<Activity>', match.end() + 13, endTrackpointIndex+2) != -1: | |
continue | |
indexesToKeep[-1].append(match.start()) | |
indexesToKeep.append([endTrackpointIndex + 13]) | |
indexesToKeep[-1].append(len(xml)) | |
xml = ''.join([xml[i:j] for i, j in indexesToKeep]) | |
print(f'Remove { min(100, max(0, int(size_constraint*100))) }% of datapoints') | |
print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb') | |
open('minified.tcx', 'w').write(xml) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment