tuck1s/rclone-stats.py

## rclone-stats.py
#!/usr/bin/env python3
import re, sys, csv, argparse
from datetime import datetime
myTimezone = '' # does rclone log in locale timezone?

def perror(str):
    print(str, file=sys.stderr)


# Function operates "in place" by reference on dict d, i.e. has side-effects on d
def rescaleUnits(d, field, unit):
    # retype data from string to float
    d[field] = float(d[field])
    if d[unit].startswith('Gi'):
        d[field] *= 2^30
        d[unit] = d[unit][2:] # trim the scaling
    elif d[unit].startswith('Mi'):
        d[field] *= 2^20
        d[unit] = d[unit][2:] # trim the scaling
    elif d[unit].startswith('Ki'):
        d[field] *= 2^10
        d[unit] = d[unit][2:] # trim the scaling
    elif d[unit].startswith('B'):
        pass
    else:
        raise ValueError('Unknown unit:', unit)
    d[field] = round(d[field], 2) # only need two decimal places


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Process human-readable default rclone log files into a .CSV file.')
    parser.add_argument('files', metavar='file', type=argparse.FileType('r'), nargs='+', help='log filename')
    args = parser.parse_args()

    for f in args.files:
        time = '0'
        checks = {}
        outfile = None
        for line in f:

            # TODO - compare against rclone format-string outputs, timezone definition etc
            # Example -
            # "2022/11/10 12:02:34 INFO  : "
            m = re.search(r'^(?P<date>\d+/\d+/\d+)\s+(?P<time>\d+:\d+:\d+)\sINFO', line)
            if(m):
                datetimeStr = m.group('date').replace('/', '-') + 'T' + m.group('time') + myTimezone
                try:
                    time = datetime.fromisoformat(datetimeStr)
                except Exception as err:
                    perror(err)
                continue

            # Example -
            # "Transferred:   	  337.485 GiB / 339.789 GiB, 99%, 996.918 KiB/s, ETA 40m23s"
            m = re.search(r'^Transferred:\s+(?P<tx_progress>\d+[\.]*\d*)\s+(?P<tx_progress_unit>[A-Za-z]+)' +
                r'\s+\/\s+(?P<tx_target>\d+[\.]*\d*)\s+(?P<tx_target_unit>[A-Za-z]+)' +
                r'\,\s*(?P<tx_progress_percent>\d+)\%' +
                r'\,\s*(?P<tx_throughput>\d+[\.]*\d*)\s+(?P<tx_throughput_unit>[A-Za-z]+\/s)' +
                r'\,\s*ETA\s*(?P<tx_ETA>[A-Za-z0-9]+)'
                , line)
            if(m):
                try:
                    # join datetime (from the INFO line) with data from the Transferred: line
                    outData = {'datetime': time.isoformat()}
                    outData.update(checks)
                    tx = m.groupdict()
                    # map "human-readable" units back to simple values
                    rescaleUnits(tx, 'tx_progress', 'tx_progress_unit')
                    rescaleUnits(tx, 'tx_target', 'tx_target_unit')
                    rescaleUnits(tx, 'tx_throughput', 'tx_throughput_unit')
                    outData.update(tx)

                    if not outfile:
                        outfile = csv.DictWriter(sys.stdout, fieldnames=outData.keys())
                        outfile.writeheader()
                    outfile.writerow(outData)
                except Exception as err:
                    perror(err)
                continue

            # Example -
            # "Checks:             27284 / 30289, 90%"
            m = re.search(r'^Checks:\s+(?P<checks>\d+)\s+\/\s+(?P<checks_denom>\d+)', line) #
            if(m):
                try:
                    checks = m.groupdict()
                except Exception as err:
                    perror(err)
                continue
	#!/usr/bin/env python3
	import re, sys, csv, argparse
	from datetime import datetime
	myTimezone = '' # does rclone log in locale timezone?

	def perror(str):
	print(str, file=sys.stderr)


	# Function operates "in place" by reference on dict d, i.e. has side-effects on d
	def rescaleUnits(d, field, unit):
	# retype data from string to float
	d[field] = float(d[field])
	if d[unit].startswith('Gi'):
	d[field] *= 2^30
	d[unit] = d[unit][2:] # trim the scaling
	elif d[unit].startswith('Mi'):
	d[field] *= 2^20
	d[unit] = d[unit][2:] # trim the scaling
	elif d[unit].startswith('Ki'):
	d[field] *= 2^10
	d[unit] = d[unit][2:] # trim the scaling
	elif d[unit].startswith('B'):
	pass
	else:
	raise ValueError('Unknown unit:', unit)
	d[field] = round(d[field], 2) # only need two decimal places


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Process human-readable default rclone log files into a .CSV file.')
	parser.add_argument('files', metavar='file', type=argparse.FileType('r'), nargs='+', help='log filename')
	args = parser.parse_args()

	for f in args.files:
	time = '0'
	checks = {}
	outfile = None
	for line in f:

	# TODO - compare against rclone format-string outputs, timezone definition etc
	# Example -
	# "2022/11/10 12:02:34 INFO : "
	m = re.search(r'^(?P<date>\d+/\d+/\d+)\s+(?P<time>\d+:\d+:\d+)\sINFO', line)
	if(m):
	datetimeStr = m.group('date').replace('/', '-') + 'T' + m.group('time') + myTimezone
	try:
	time = datetime.fromisoformat(datetimeStr)
	except Exception as err:
	perror(err)
	continue

	# Example -
	# "Transferred: 337.485 GiB / 339.789 GiB, 99%, 996.918 KiB/s, ETA 40m23s"
	m = re.search(r'^Transferred:\s+(?P<tx_progress>\d+[\.]\d)\s+(?P<tx_progress_unit>[A-Za-z]+)' +
	r'\s+\/\s+(?P<tx_target>\d+[\.]\d)\s+(?P<tx_target_unit>[A-Za-z]+)' +
	r'\,\s*(?P<tx_progress_percent>\d+)\%' +
	r'\,\s(?P<tx_throughput>\d+[\.]\d*)\s+(?P<tx_throughput_unit>[A-Za-z]+\/s)' +
	r'\,\sETA\s(?P<tx_ETA>[A-Za-z0-9]+)'
	, line)
	if(m):
	try:
	# join datetime (from the INFO line) with data from the Transferred: line
	outData = {'datetime': time.isoformat()}
	outData.update(checks)
	tx = m.groupdict()
	# map "human-readable" units back to simple values
	rescaleUnits(tx, 'tx_progress', 'tx_progress_unit')
	rescaleUnits(tx, 'tx_target', 'tx_target_unit')
	rescaleUnits(tx, 'tx_throughput', 'tx_throughput_unit')
	outData.update(tx)

	if not outfile:
	outfile = csv.DictWriter(sys.stdout, fieldnames=outData.keys())
	outfile.writeheader()
	outfile.writerow(outData)
	except Exception as err:
	perror(err)
	continue

	# Example -
	# "Checks: 27284 / 30289, 90%"
	m = re.search(r'^Checks:\s+(?P<checks>\d+)\s+\/\s+(?P<checks_denom>\d+)', line) #
	if(m):
	try:
	checks = m.groupdict()
	except Exception as err:
	perror(err)
	continue