stampyzfanz/tcxcombiner.py

## tcxcombiner.py
# 1 Create a file minify.py in that directory with these contents or download it
# 2 Execute it with the command 'py tcxcombiner.py' in order to combine the files into one, and to optionally minify it

# The minification works by removing every x location data point and removing 94% of the distanceMeters, time, and altiitude tag.
# If any of them are desired, edit the following Trues to False

remove_distance = True
remove_time = True
remove_altitude = True

# The maximum for https://www.gpsvisualizer.com/ is 10mb
# 9 mb is below
size_constraint = 9000000
# this is what percent looks like
# size_constraint = "60%"

import random
import re
import sys
import os

xml = ''
if os.path.isfile('everything.tcx'):
    with open('everything.tcx', 'r') as f:
        xml = open('everything.tcx').read()
else:
    listeddir = os.listdir()

    files = {}
    for filename in listeddir:
        if not filename.endswith('.tcx') or filename == "everything.tcx" or filename == "minified.tcx":
            continue

        with open(filename, 'r') as f:
            data = f.read(10000)
            date = re.findall(r"StartTime=\"(.*?)(?:T|\")", data)[0]

            files[filename] = date

    files = sorted(files.items(), key=lambda item: item[1])

    for i, file in enumerate(files):
        with open(file[0], 'r') as f:
            data = f.read()
            if i == 0:
                # the stuff beforehand
                xml = data.split('<Activity')[0]

            activityData = re.findall('<Activity.*</Activity>', data, re.S)[0]

            activityData = re.sub("(<Activity.*?>)", r'\1<Notes>'
                                  + file[0].replace(".tcx", "") + " - " +
                                  file[1] + "</Notes>", activityData)

            regex_to_remove = ""
            regex_to_remove += '(<DistanceMeters>.*?</DistanceMeters>)|' if remove_distance else ""
            regex_to_remove += '(<Time>.*?</Time>)|' if remove_time else ""
            regex_to_remove += '(<AltitudeMeters.*?</AltitudeMeters>)|' if remove_altitude else ""
            removeParts = list(re.finditer(regex_to_remove[:-1], activityData))
            for i, match in enumerate(reversed(removeParts)):
                if 20 < i < len(removeParts) - 20 or random.random() < 0.04:
                    continue

                activityData = activityData[:match.start(
                )] + activityData[match.end():]

            xml += activityData

    xml += '</Activities></TrainingCenterDatabase>'


open('everything.tcx', 'w').write(xml)

print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb')

if type(size_constraint) is str:
    size_constraint = int(size_constraint[:-1])/100
else:
    size_constraint = 1 - size_constraint / sys.getsizeof(xml)

indexesToKeep = [[0]]
for i, match in enumerate(re.finditer('<Trackpoint>', xml)):
    if random.random() < size_constraint:
        endTrackpointIndex = xml.find('</Trackpoint>', match.end())

        if xml.find('<Activity>', match.end() + 13, endTrackpointIndex+2) != -1:
            continue

        indexesToKeep[-1].append(match.start())
        indexesToKeep.append([endTrackpointIndex + 13])

indexesToKeep[-1].append(len(xml))
xml = ''.join([xml[i:j] for i, j in indexesToKeep])

print(f'Remove { min(100, max(0, int(size_constraint*100))) }% of datapoints')

print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb')

open('minified.tcx', 'w').write(xml)
	# 1 Create a file minify.py in that directory with these contents or download it
	# 2 Execute it with the command 'py tcxcombiner.py' in order to combine the files into one, and to optionally minify it

	# The minification works by removing every x location data point and removing 94% of the distanceMeters, time, and altiitude tag.
	# If any of them are desired, edit the following Trues to False

	remove_distance = True
	remove_time = True
	remove_altitude = True

	# The maximum for https://www.gpsvisualizer.com/ is 10mb
	# 9 mb is below
	size_constraint = 9000000
	# this is what percent looks like
	# size_constraint = "60%"

	import random
	import re
	import sys
	import os

	xml = ''
	if os.path.isfile('everything.tcx'):
	with open('everything.tcx', 'r') as f:
	xml = open('everything.tcx').read()
	else:
	listeddir = os.listdir()

	files = {}
	for filename in listeddir:
	if not filename.endswith('.tcx') or filename == "everything.tcx" or filename == "minified.tcx":
	continue

	with open(filename, 'r') as f:
	data = f.read(10000)
	date = re.findall(r"StartTime=\"(.*?)(?:T\|\")", data)[0]

	files[filename] = date

	files = sorted(files.items(), key=lambda item: item[1])

	for i, file in enumerate(files):
	with open(file[0], 'r') as f:
	data = f.read()
	if i == 0:
	# the stuff beforehand
	xml = data.split('<Activity')[0]

	activityData = re.findall('<Activity.*</Activity>', data, re.S)[0]

	activityData = re.sub("(<Activity.*?>)", r'\1<Notes>'
	+ file[0].replace(".tcx", "") + " - " +
	file[1] + "</Notes>", activityData)

	regex_to_remove = ""
	regex_to_remove += '(<DistanceMeters>.*?</DistanceMeters>)\|' if remove_distance else ""
	regex_to_remove += '(<Time>.*?</Time>)\|' if remove_time else ""
	regex_to_remove += '(<AltitudeMeters.*?</AltitudeMeters>)\|' if remove_altitude else ""
	removeParts = list(re.finditer(regex_to_remove[:-1], activityData))
	for i, match in enumerate(reversed(removeParts)):
	if 20 < i < len(removeParts) - 20 or random.random() < 0.04:
	continue

	activityData = activityData[:match.start(
	)] + activityData[match.end():]

	xml += activityData

	xml += '</Activities></TrainingCenterDatabase>'


	open('everything.tcx', 'w').write(xml)

	print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb')

	if type(size_constraint) is str:
	size_constraint = int(size_constraint[:-1])/100
	else:
	size_constraint = 1 - size_constraint / sys.getsizeof(xml)

	indexesToKeep = [[0]]
	for i, match in enumerate(re.finditer('<Trackpoint>', xml)):
	if random.random() < size_constraint:
	endTrackpointIndex = xml.find('</Trackpoint>', match.end())

	if xml.find('<Activity>', match.end() + 13, endTrackpointIndex+2) != -1:
	continue

	indexesToKeep[-1].append(match.start())
	indexesToKeep.append([endTrackpointIndex + 13])

	indexesToKeep[-1].append(len(xml))
	xml = ''.join([xml[i:j] for i, j in indexesToKeep])

	print(f'Remove { min(100, max(0, int(size_constraint*100))) }% of datapoints')

	print(round(sys.getsizeof(xml)/1024/1024, 1), 'mb')

	open('minified.tcx', 'w').write(xml)