Skip to content

Instantly share code, notes, and snippets.

@thomaspatzke
Created January 30, 2016 22:29
Show Gist options
  • Save thomaspatzke/a5edfb8f2cfc4552f509 to your computer and use it in GitHub Desktop.
Save thomaspatzke/a5edfb8f2cfc4552f509 to your computer and use it in GitHub Desktop.
Create file deletion script from two 'openssl sha1' outputs. Deletions are done in files referenced in source hash file.
#!/usr/bin/python3
from sys import argv, exit
import re
hashline_re = re.compile('^SHA1\((.*?)\)= (.*)$')
dsthashes = dict()
if len(argv) < 4:
print("Usage: " + argv[0] + " <srchashes> <dsthashes> <output-script> [<prefix>]")
exit(1)
try:
prefix = argv[4]
except IndexError:
prefix = ""
try:
hsrc = open(argv[1], "r")
hdst = open(argv[2], "r")
outscript = open(argv[3], "w")
except IOError as e:
print(e)
exit(2)
lcnt = 0
mcnt = 0
dcnt = 0
ncnt = 0
for l in hdst.readlines():
lcnt += 1
match = hashline_re.search(l)
if match:
mcnt += 1
fname = match.group(1)
fhash = match.group(2)
if fhash in dsthashes:
print("Duplicate file in destination. File {} equals {}, hash={}".format(fname, dsthashes[fhash], fhash))
dcnt += 1
else:
dsthashes[fhash] = fname
else:
ncnt += 1
print("Line '{}' not recognized")
hdst.close()
print("Read {} lines, {} matched, {} not. Found {} duplicates in destination".format(lcnt, mcnt, ncnt, dcnt))
print("#!/bin/sh", file=outscript)
lcnt = 0
mcnt = 0
dcnt = 0
ncnt = 0
for l in hsrc.readlines():
lcnt += 1
match = hashline_re.search(l)
if match:
mcnt += 1
fname = match.group(1)
fhash = match.group(2)
if fhash in dsthashes:
dcnt += 1
print("# duplicate: src={} (deleted), dst={}, hash={}".format(fname, dsthashes[fhash], fhash), file=outscript)
print("rm {}{}".format(prefix, fname), file=outscript)
else:
print("File '{}' (hash: {}) has no duplicate in destination".format(fname, fhash))
else:
ncnt += 1
print("Line '{}' not recognized")
hsrc.close()
outscript.close()
print("Read {} lines, {} matched, {} not. Found {} duplicates in destination".format(lcnt, mcnt, ncnt, dcnt))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment