Skip to content

Instantly share code, notes, and snippets.

@cpelley
Created November 24, 2017 04:07
Show Gist options
  • Save cpelley/0e47ecef94bc61e4a7a8d1e4bbcafcdc to your computer and use it in GitHub Desktop.
Save cpelley/0e47ecef94bc61e4a7a8d1e4bbcafcdc to your computer and use it in GitHub Desktop.
Fetch svn file renames/moves
#!/usr/bin/env python2.7
import argparse
from difflib import SequenceMatcher
import os
import subprocess
import tempfile
def main(old_url, new_url, threshold):
OLD_URL, OLD_REV = old_url.split('@')
NEW_URL, NEW_REV = new_url.split('@')
command = 'svn diff --old {}@{} --new {}@{} --summarize'
command = command.format(OLD_URL, OLD_REV, NEW_URL, NEW_REV)
stdout = subprocess.check_output(command, shell=True)
stdout = stdout.split('\n')
added = [std.strip('A').strip() for std in stdout if std.startswith('A')]
removed = [std.strip('D').strip() for std in stdout if std.startswith('D')]
pairing = {}
# Those files removed to search
removed_search = removed
for afile in added:
fh_a = tempfile.NamedTemporaryFile()
fh_a.close()
command = 'svn export -q {}@{} {}'.format(afile, NEW_REV, fh_a.name)
subprocess.call(command, shell=True)
if not os.path.isfile(fh_a.name):
continue
with open(fh_a.name, 'r') as added:
added_text = added.read()
found = False
for ind in range(len(removed_search)):
dfile = removed_search[ind]
fh_d = tempfile.NamedTemporaryFile()
fh_d.close()
command = 'svn export -q {}@{} {}'.format(dfile, OLD_REV, fh_d.name)
subprocess.call(command, shell=True)
if not os.path.isfile(fh_d.name):
continue
with open(fh_d.name, 'r') as deleted:
deleted_text = deleted.read()
m = SequenceMatcher(None, deleted_text, added_text)
if m.ratio() > threshold:
pairing[dfile] = afile
found = True
break
if found:
# Remove the match so that it doesn't contribute to the next
# search.
removed_search.pop(ind)
#for ind in range(len(stdout)):
# change = stdout[ind].split()
# if change[-1] in pairing.keys():
# continue
# if change[-1] in pairing.values():
# continue
# print chane
for key, value in pairing.iteritems():
print '{}\t->\t{}'.format(key,value)
if __name__ == '__main__':
msg = ('List all files that are identified as being renamed/moved using a '
'heuristic for determining percentage similarity.')
parser = argparse.ArgumentParser(description=msg)
parser.add_argument('old', help='OLD-URL[@OLDREV]')
parser.add_argument('new', help='NEW-URL[@NEWREV]')
msg = ('Threshold, defaults to 0.7 when not specified. Similarities '
'between files greater than this fraction are flagged as a '
'rename/move.')
parser.add_argument('--threshold', type=float, default=.7, help=msg)
args = parser.parse_args()
main(args.old, args.new, args.threshold)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment