Skip to content

Instantly share code, notes, and snippets.

@u1735067
Last active April 19, 2018 18:12
Show Gist options
  • Save u1735067/962b8b2c040451a5c008be307adcaed9 to your computer and use it in GitHub Desktop.
Save u1735067/962b8b2c040451a5c008be307adcaed9 to your computer and use it in GitHub Desktop.
snapraid dup -l dups.log > /dev/null && snapraid-dup-log2csv.py dups.log dups.csv ';' && rm dups.log
#!/usr/bin/env python3
import sys, errno, csv
if len(sys.argv) < 3:
print('usage: snapraid', file=sys.stderr)
sys.exit(errno.EINVAL)
def size_to_human(size, formatted_number=True):
prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
prefix_index = 0
while size >= 1024 and prefix_index < len(prefixes)-1:
size /= 1024
prefix_index += 1
# 3.00 -> 3, 3.141 => 3.14 - https://stackoverflow.com/questions/2440692/formatting-floats-in-python-without-superfluous-zeros
if formatted_number:
size = format(size, '.2f').rstrip('0').rstrip('.')
return (size, prefixes[prefix_index])
with open(sys.argv[1], 'r', encoding='utf-8') as input:
dups = []
dups_size = 0
for line in input:
sline = line.split(':')
if sline[0] != 'dup':
continue
dups.append({
'source1': sline[1],
'file1': sline[2],
'source2': sline[3],
'file2': sline[4],
'size': sline[5]
})
dups_size += int(sline[5])
if not len(dups):
print('No dups in the log file', file=sys.stderr)
sys.exit(errno.ENOMSG)
else:
print('{} duplicates found ({} {})'.format(len(dups), *size_to_human(dups_size)), file=sys.stderr)
dups.sort(key=(lambda x: int(x['size'])), reverse=True)
with open(sys.argv[2], 'w', encoding='utf-8-sig', newline='') as output:
writer = csv.DictWriter(output, fieldnames=dups[0].keys(), delimiter=(sys.argv[3] if len(sys.argv) > 3 else None))
writer.writeheader()
for entry in dups:
writer.writerow(entry)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment