Skip to content

Instantly share code, notes, and snippets.

@zbraniecki
Last active August 29, 2015 14:23
Show Gist options
  • Save zbraniecki/605042399d296ee537b0 to your computer and use it in GitHub Desktop.
Save zbraniecki/605042399d296ee537b0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import argparse
import random
import hglib
import json
import subprocess
import re
PY_COMPARE = 'compare-dirs'
JS_COMPARE = '/src/l10n/slave-ball/vendor-local/compare-locales.js/bin/compare-dirs.js'
def compare_details(det1, det2):
if type(det1) != type(det2):
raise Exception('Type mismatch between "%s" and "%s"' % (det1, det2))
if type(det1) == int:
if det1 != det2:
raise Exception('Different value between "%s" and "%s"' % (det1, det2))
if type(det1) == str or type(det1) == unicode:
if det1 != det2:
# python reports if the bad byte is a start or a continuation
# No way to get that in node.
if "codec can't decode" in det1 and "codec can't decode" in det2:
pass
# cl.py reports Unparser content with the preceeding comment
# We want to remove the comment and compare the diff then
elif det1.startswith('Unparsed content') or det2.startswith('Unparsed content'):
pattern = re.compile(r'Unparsed content "(.*)" at ([0-9]+)-([0-9]+)', re.M|re.S)
match1 = pattern.match(det1)
match2 = pattern.match(det2)
content1 = match1.group(1).strip()
content2 = match2.group(1).strip()
content1 = re.sub(r'^#.*\n', '', content1, flags=re.M).strip()
content2 = re.sub(r'^#.*\n', '', content2, flags=re.M).strip()
if content1 != content2:
raise Exception('Different content between "%s" and "%s"' % (content1, content2))
diff1 = len(match1.group(1)) - len(content1)
diff2 = len(match2.group(1)) - len(content2)
start1 = int(match1.group(2))
start2 = int(match2.group(2))
if start1 + diff1 != start2 + diff2:
raise Exception('Different start pos between "%s" and "%s"' %
(det1, det2))
end1 = match1.group(3)
end2 = match2.group(3)
if end1 != end2:
raise Exception('Different end pos between "%s" and "%s"' %
(det1, det2))
else:
raise Exception('Different value between "%s" and "%s"' % (det1, det2))
if type(det1) == list:
if len(det1) != len(det2):
raise Exception('Different length between "%s" and "%s"' % (det1, det2))
for i in range(0, len(det1)):
compare_details(det1[i], det2[i])
if type(det1) == dict:
if det1.keys() != det2.keys():
if u'missingFile' in det1.keys():
# compare-locales does not include strings key for unknown
# formats. c-l.js includes strings: 0 in that case
det1[u'strings'] = 0
if det1.keys() != det2.keys():
raise Exception('Different keys between "%s" and "%s"' % (det1, det2))
for key in det1:
compare_details(det1[key], det2[key])
def check(ref, loc):
py = subprocess.check_output([PY_COMPARE, '--data=json', ref, loc])
js = subprocess.check_output([JS_COMPARE, '--data=json', ref, loc])
if py.find('_junk_') != -1:
print('Skipping rev for %s because of junk in result' % ref)
return False
py_data = json.loads(py)
js_data = json.loads(js)
# compare details in depth
try:
compare_details(py_data['details'], js_data['details'])
except Exception as e:
print('bad details')
print(e)
return True
# compare summaries excluding keys, which are python only
py_sum = py_data['summary']['null']
js_sum = js_data['summary']['null']
for key in ('missing', 'missingInFiles', 'obsolete'):
if py_sum.get(key, 0) != js_sum.get(key, 0):
print('bad', key)
return True
if py_sum.get('changed', 0) + py_sum.get('unchanged', 0) + py_sum.get('keys', 0) != \
js_sum.get('changed', 0) + js_sum.get('unchanged', 0):
print('bad changed+unchanged+keys')
return True
return False
def fuzz(max_comps, dir1, dir2):
client1 = hglib.open(args.dir1);
client2 = hglib.open(args.dir2);
tries_so_far = 0
orig1 = client1.tip().rev
orig2 = client2.tip().rev
compared = []
random.seed()
log1 = client1.log();
log2 = client2.log();
while len(log1) and len(log2):
tries_so_far += 1
print(tries_so_far)
if tries_so_far == args.max_comps:
break
cset1 = log1.pop(random.randrange(len(log1)))
cset2 = log2.pop(random.randrange(len(log2)))
client1.update(cset1.rev)
client2.update(cset2.rev)
if check(args.dir1, args.dir2):
print
print("{}: {}".format(args.dir1, cset1.node))
print("{}: {}".format(args.dir2, cset2.node))
break
client1.update(orig1)
client2.update(orig2)
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument("dir1", type=str, help="first dir")
parser.add_argument("dir2", type=str, help="second dir")
parser.add_argument("max_comps", type=int, help="max number of tries")
args = parser.parse_args()
fuzz(**vars(args))
####
# en-US at 1535840ba819188ff9c17b1ee29390afc0c131d7 in
# ./apps/homescreen/homescreen.properties has
# >>>>>>> [Bug 923420] Add 'More Apps' to l10n [r=crdlc]
#
# there are entities like "foo:foo2 = foo3" so we have to support ":"
# there are revisions in en-US with _junk_ entries
#
#
# en-US at 5547baf1df446c2eca80cd116eb4311ae96b795c in
# shared/download/download.properties has
#
# /***
# * Foo
# */
#
# cs at 02341dd96958845384b4a65c65c83e9696dbb09a in
# apps/email/email.properties has an error and python c-s treats
# the comment before it as part of the error
#
# sq at aba033cf6ea5e028ae1f95c3fca6421949f0f188 in
# apps/email/email.properties has a line:
# ==========================================================================================
# which c-l turns into entity with key '"' and c-l.js turns into an error
#
# en-US:3107016b3fba
# de:a6c389bb49f2
# error: in cl.py summary is empty, while it should have at least value for
# ['null']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment