zbraniecki/fuzz

## fuzz
#!/usr/bin/env python

import argparse
import random
import hglib
import json
import subprocess
import re

PY_COMPARE = 'compare-dirs'
JS_COMPARE = '/src/l10n/slave-ball/vendor-local/compare-locales.js/bin/compare-dirs.js'

def compare_details(det1, det2):
    if type(det1) != type(det2):
        raise Exception('Type mismatch between "%s" and "%s"' % (det1, det2))
    if type(det1) == int:
        if det1 != det2:
            raise Exception('Different value between "%s" and "%s"' % (det1, det2))
    if type(det1) == str or type(det1) == unicode:
        if det1 != det2:
            # python reports if the bad byte is a start or a continuation
            # No way to get that in node.
            if "codec can't decode" in det1 and "codec can't decode" in det2:
                pass
            # cl.py reports Unparser content with the preceeding comment
            # We want to remove the comment and compare the diff then
            elif det1.startswith('Unparsed content') or det2.startswith('Unparsed content'):
                 pattern = re.compile(r'Unparsed content "(.*)" at ([0-9]+)-([0-9]+)', re.M|re.S)
                 match1 = pattern.match(det1)
                 match2 = pattern.match(det2)
                 content1 = match1.group(1).strip()
                 content2 = match2.group(1).strip()
                 content1 = re.sub(r'^#.*\n', '', content1, flags=re.M).strip()
                 content2 = re.sub(r'^#.*\n', '', content2, flags=re.M).strip()
                 if content1 != content2:
                     raise Exception('Different content between "%s" and "%s"' % (content1, content2))

                 diff1 = len(match1.group(1)) - len(content1)
                 diff2 = len(match2.group(1)) - len(content2)
                 start1 = int(match1.group(2))
                 start2 = int(match2.group(2))
                 if start1 + diff1 != start2 + diff2:
                     raise Exception('Different start pos between "%s" and "%s"' %
                                     (det1, det2))
                 end1 = match1.group(3)
                 end2 = match2.group(3)
                 if end1 != end2:
                     raise Exception('Different end pos between "%s" and "%s"' %
                                     (det1, det2))
            else:
                raise Exception('Different value between "%s" and "%s"' % (det1, det2))
    if type(det1) == list:
        if len(det1) != len(det2):
            raise Exception('Different length between "%s" and "%s"' % (det1, det2))
        for i in range(0, len(det1)):
            compare_details(det1[i], det2[i])
    if type(det1) == dict:
        if det1.keys() != det2.keys():
            if u'missingFile' in det1.keys():
                # compare-locales does not include strings key for unknown
                # formats. c-l.js includes strings: 0 in that case
                det1[u'strings'] = 0
            if det1.keys() != det2.keys():
                raise Exception('Different keys between "%s" and "%s"' % (det1, det2))
        for key in det1:
            compare_details(det1[key], det2[key])

def check(ref, loc):
    py = subprocess.check_output([PY_COMPARE, '--data=json', ref, loc])
    js = subprocess.check_output([JS_COMPARE, '--data=json', ref, loc])
    if py.find('_junk_') != -1:
        print('Skipping rev for %s because of junk in result' % ref)
        return False
    py_data = json.loads(py)
    js_data = json.loads(js)
    # compare details in depth
    try:
        compare_details(py_data['details'], js_data['details'])
    except Exception as e:
        print('bad details')
        print(e)
        return True
    # compare summaries excluding keys, which are python only
    py_sum = py_data['summary']['null']
    js_sum = js_data['summary']['null']
    for key in ('missing', 'missingInFiles', 'obsolete'):
        if py_sum.get(key, 0) != js_sum.get(key, 0):
            print('bad', key)
            return True
    if py_sum.get('changed', 0) + py_sum.get('unchanged', 0) + py_sum.get('keys', 0) != \
      js_sum.get('changed', 0) + js_sum.get('unchanged', 0):
        print('bad changed+unchanged+keys')
        return True
    return False

def fuzz(max_comps, dir1, dir2):
    client1 = hglib.open(args.dir1);
    client2 = hglib.open(args.dir2);

    tries_so_far = 0

    orig1 = client1.tip().rev
    orig2 = client2.tip().rev

    compared = []
    random.seed()

    log1 = client1.log();
    log2 = client2.log();

    while len(log1) and len(log2):
        tries_so_far += 1
        print(tries_so_far)
        if tries_so_far == args.max_comps:
            break

        cset1 = log1.pop(random.randrange(len(log1)))
        cset2 = log2.pop(random.randrange(len(log2)))

        client1.update(cset1.rev)
        client2.update(cset2.rev)

        if check(args.dir1, args.dir2):
            print
            print("{}: {}".format(args.dir1, cset1.node))
            print("{}: {}".format(args.dir2, cset2.node))
            break

    client1.update(orig1)
    client2.update(orig2)

if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("dir1", type=str, help="first dir")
    parser.add_argument("dir2", type=str, help="second dir")
    parser.add_argument("max_comps", type=int, help="max number of tries")
    args = parser.parse_args()
    fuzz(**vars(args))


####
# en-US at 1535840ba819188ff9c17b1ee29390afc0c131d7 in
# ./apps/homescreen/homescreen.properties has
# >>>>>>> [Bug 923420] Add 'More Apps' to l10n [r=crdlc]
#
# there are entities like "foo:foo2 = foo3" so we have to support ":"
# there are revisions in en-US with _junk_ entries
#
#
# en-US at 5547baf1df446c2eca80cd116eb4311ae96b795c in
# shared/download/download.properties has
#
# /***
#  * Foo
#  */
#
# cs at 02341dd96958845384b4a65c65c83e9696dbb09a in
# apps/email/email.properties has an error and python c-s treats
# the comment before it as part of the error
#
# sq at aba033cf6ea5e028ae1f95c3fca6421949f0f188 in
# apps/email/email.properties has a line:
# ==========================================================================================
# which c-l turns into entity with key '"' and c-l.js turns into an error
#
# en-US:3107016b3fba
# de:a6c389bb49f2
# error: in cl.py summary is empty, while it should have at least value for
# ['null']
	#!/usr/bin/env python

	import argparse
	import random
	import hglib
	import json
	import subprocess
	import re

	PY_COMPARE = 'compare-dirs'
	JS_COMPARE = '/src/l10n/slave-ball/vendor-local/compare-locales.js/bin/compare-dirs.js'

	def compare_details(det1, det2):
	if type(det1) != type(det2):
	raise Exception('Type mismatch between "%s" and "%s"' % (det1, det2))
	if type(det1) == int:
	if det1 != det2:
	raise Exception('Different value between "%s" and "%s"' % (det1, det2))
	if type(det1) == str or type(det1) == unicode:
	if det1 != det2:
	# python reports if the bad byte is a start or a continuation
	# No way to get that in node.
	if "codec can't decode" in det1 and "codec can't decode" in det2:
	pass
	# cl.py reports Unparser content with the preceeding comment
	# We want to remove the comment and compare the diff then
	elif det1.startswith('Unparsed content') or det2.startswith('Unparsed content'):
	pattern = re.compile(r'Unparsed content "(.*)" at ([0-9]+)-([0-9]+)', re.M\|re.S)
	match1 = pattern.match(det1)
	match2 = pattern.match(det2)
	content1 = match1.group(1).strip()
	content2 = match2.group(1).strip()
	content1 = re.sub(r'^#.*\n', '', content1, flags=re.M).strip()
	content2 = re.sub(r'^#.*\n', '', content2, flags=re.M).strip()
	if content1 != content2:
	raise Exception('Different content between "%s" and "%s"' % (content1, content2))

	diff1 = len(match1.group(1)) - len(content1)
	diff2 = len(match2.group(1)) - len(content2)
	start1 = int(match1.group(2))
	start2 = int(match2.group(2))
	if start1 + diff1 != start2 + diff2:
	raise Exception('Different start pos between "%s" and "%s"' %
	(det1, det2))
	end1 = match1.group(3)
	end2 = match2.group(3)
	if end1 != end2:
	raise Exception('Different end pos between "%s" and "%s"' %
	(det1, det2))
	else:
	raise Exception('Different value between "%s" and "%s"' % (det1, det2))
	if type(det1) == list:
	if len(det1) != len(det2):
	raise Exception('Different length between "%s" and "%s"' % (det1, det2))
	for i in range(0, len(det1)):
	compare_details(det1[i], det2[i])
	if type(det1) == dict:
	if det1.keys() != det2.keys():
	if u'missingFile' in det1.keys():
	# compare-locales does not include strings key for unknown
	# formats. c-l.js includes strings: 0 in that case
	det1[u'strings'] = 0
	if det1.keys() != det2.keys():
	raise Exception('Different keys between "%s" and "%s"' % (det1, det2))
	for key in det1:
	compare_details(det1[key], det2[key])

	def check(ref, loc):
	py = subprocess.check_output([PY_COMPARE, '--data=json', ref, loc])
	js = subprocess.check_output([JS_COMPARE, '--data=json', ref, loc])
	if py.find('_junk_') != -1:
	print('Skipping rev for %s because of junk in result' % ref)
	return False
	py_data = json.loads(py)
	js_data = json.loads(js)
	# compare details in depth
	try:
	compare_details(py_data['details'], js_data['details'])
	except Exception as e:
	print('bad details')
	print(e)
	return True
	# compare summaries excluding keys, which are python only
	py_sum = py_data['summary']['null']
	js_sum = js_data['summary']['null']
	for key in ('missing', 'missingInFiles', 'obsolete'):
	if py_sum.get(key, 0) != js_sum.get(key, 0):
	print('bad', key)
	return True
	if py_sum.get('changed', 0) + py_sum.get('unchanged', 0) + py_sum.get('keys', 0) != \
	js_sum.get('changed', 0) + js_sum.get('unchanged', 0):
	print('bad changed+unchanged+keys')
	return True
	return False

	def fuzz(max_comps, dir1, dir2):
	client1 = hglib.open(args.dir1);
	client2 = hglib.open(args.dir2);

	tries_so_far = 0

	orig1 = client1.tip().rev
	orig2 = client2.tip().rev

	compared = []
	random.seed()

	log1 = client1.log();
	log2 = client2.log();

	while len(log1) and len(log2):
	tries_so_far += 1
	print(tries_so_far)
	if tries_so_far == args.max_comps:
	break

	cset1 = log1.pop(random.randrange(len(log1)))
	cset2 = log2.pop(random.randrange(len(log2)))

	client1.update(cset1.rev)
	client2.update(cset2.rev)

	if check(args.dir1, args.dir2):
	print
	print("{}: {}".format(args.dir1, cset1.node))
	print("{}: {}".format(args.dir2, cset2.node))
	break

	client1.update(orig1)
	client2.update(orig2)

	if __name__=='__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument("dir1", type=str, help="first dir")
	parser.add_argument("dir2", type=str, help="second dir")
	parser.add_argument("max_comps", type=int, help="max number of tries")
	args = parser.parse_args()
	fuzz(**vars(args))


	####
	# en-US at 1535840ba819188ff9c17b1ee29390afc0c131d7 in
	# ./apps/homescreen/homescreen.properties has
	# >>>>>>> [Bug 923420] Add 'More Apps' to l10n [r=crdlc]
	#
	# there are entities like "foo:foo2 = foo3" so we have to support ":"
	# there are revisions in en-US with _junk_ entries
	#
	#
	# en-US at 5547baf1df446c2eca80cd116eb4311ae96b795c in
	# shared/download/download.properties has
	#
	# /***
	# * Foo
	# */
	#
	# cs at 02341dd96958845384b4a65c65c83e9696dbb09a in
	# apps/email/email.properties has an error and python c-s treats
	# the comment before it as part of the error
	#
	# sq at aba033cf6ea5e028ae1f95c3fca6421949f0f188 in
	# apps/email/email.properties has a line:
	# ==========================================================================================
	# which c-l turns into entity with key '"' and c-l.js turns into an error
	#
	# en-US:3107016b3fba
	# de:a6c389bb49f2
	# error: in cl.py summary is empty, while it should have at least value for
	# ['null']