Pike/l10n_impact.py

## l10n_impact.py
# Copyright 2011 Mozilla Foundation
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from mercurial import hg, commands
from mercurial.match import match as _match
from mercurial.node import nullid
from mercurial.copies import copies as _copies

from collections import defaultdict
import re


def _split(f):
    t = f.rsplit("/", 1)
    if len(t) == 1:
        t.insert(0, "")
    return t

# copied from mercurial.copies, as it's internal
def _dirname(f):
    s = f.rfind("/")
    if s == -1:
        return ""
    return f[:s]

# copied from mercurial.copies, as it's internal
def _dirs(files):
    d = set()
    for f in files:
        f = _dirname(f)
        while f not in d:
            d.add(f)
            f = _dirname(f)
    return d


def _universal_newlines(content):
    "CompareLocales reads files with universal newlines, fake that"
    return content.replace('\r\n', '\n').replace('\r', '\n')


def l10n_impact(ui, repo, from_, to_='default', **opts):
    """Compare the l10n impact of the revision history

    Find file moves and copies for now.
    """
    from Mozilla.Parser import getParser
    l10n = re.compile('/locales/en-US/')
    ctx1 = repo.changectx(from_)
    ctx2 = repo.changectx(to_)
    copies = _copies(repo, ctx1, ctx2, repo[nullid], checkdirs=False)[0]
    copies = dict(filter(lambda t: l10n.search(t[0]) or l10n.search(t[1]),
                         copies.iteritems()))
    match = _match(repo.root, repo.getcwd, ['relre:%s' % l10n.pattern])
    changed, added, removed = repo.status(ctx1, ctx2, match=match)[:3]
    moves = {}
    realcopies = {}
    for dest, source in copies.iteritems():
        if source in removed:
            moves[dest] = source
            removed.remove(source)
        else:
            realcopies[dest] = source
    added = filter(lambda p: p not in copies, added)

    m1 = list(f for f in ctx1.manifest() if l10n.search(f))
    d1 = _dirs(m1)
    m2 = list(f for f in ctx1.manifest() if l10n.search(f))
    d2 = _dirs(m2)
    dirty = set()

    # find removed directories
    rmdirs = defaultdict(list)
    for path in removed:
        #print path
        _d = _dirname(path)
        if _d in d2:
            continue
        _p = _dirname(_d)
        while _p not in d2 and not _d.endswith('/locales/en-US'):
            _d = _p
            _p = _dirname(_d)
        rmdirs[_d].append(path[len(_d) + 1 :])

    # find moved directories
    mvdirs = defaultdict(list)
    mvmap = {}
    for dest, source in moves.iteritems():
        #print dest
        _d, _s = _dirname(dest), _dirname(source)
        if _d in d2 or _s in d2:
            # either not a new directory, or not an empty one
            continue
        _pd, _ld = _split(_d)
        _sd, _ls = _split(_s)
        while (_ld == _ls and
               _pd not in d2 and not _d.endswith('/locales/en-US') and
               _sd not in d2 and not _s.endswith('/locales/en-US')):
            _d, _s = _pd, _sd
            _pd, _ld = _split(_d)
            _sd, _ls = _split(_s)
        mvdirs[_d].append(dest[len(_d) + 1 :])
        mvmap[_d] = _s

    # find copied directories
    cpdirs = defaultdict(list)
    cpmap = {}
    for dest, source in realcopies.iteritems():
        #print dest
        _d, _s = _dirname(dest), _dirname(source)
        if _d in d2:
            # not a new directory
            continue
        _pd, _ld = _split(_d)
        _sd, _ls = _split(_s)
        while (_ld == _ls and
               _pd not in d2 and not _d.endswith('/locales/en-US') and
               _sd not in d2 and not _s.endswith('/locales/en-US')):
            _d, _s = _pd, _sd
            _pd, _ld = _split(_d)
            _sd, _ls = _split(_s)
        cpdirs[_d].append(dest[len(_d) + 1 :])
        cpmap[_d] = _s

    # find new directories
    newdirs = defaultdict(list)
    for path in added:
        #print path
        _d = _dirname(path)
        if _d in d1:
            continue
        _p = _dirname(_d)
        while _p not in d1 and not _d.endswith('/locales/en-US'):
            _d = _p
            _p = _dirname(_d)
        newdirs[_d].append(path[len(_d) + 1 :])

#    for dest, source in realcopies.iteritems():
#        print 'cp', source, dest
#    for dest, source in moves.iteritems():
#        print 'mv', source, dest
    for d, files in newdirs.iteritems():
        print ' added', d, '(%s)' % ', '.join(files)
    for d, files in rmdirs.iteritems():
        print ' removed', d, '(%s)' % ', '.join(files)
    for d, files in cpdirs.iteritems():
        print ' copied', cpmap[d], 'to', d, '(%s)' % ', '.join(files)
    for d, files in mvdirs.iteritems():
        print ' moved', mvmap[d], 'to', d, '(%s)' % ', '.join(files)

    for dest in sorted(changed + copies.keys()):
        source =  copies.get(dest, dest)
        p = None
        try:
            p = getParser(dest)
        except UserWarning:
            if source == dest:
                print ' not checking modified', dest
            else:
                print ' not checking %s, was %s' % (dest, source)
        if p is not None:
            data = ctx1.filectx(source).data()
            data = _universal_newlines(data)
            try:
                p.readContents(data)
                a_entities, a_map = p.parse()
            except:
                print ' failed to read source', source
                continue
            data = ctx2.filectx(dest).data()
            data = _universal_newlines(data)
            try:
                p.readContents(data)
                b_entities, b_map = p.parse()
            except:
                print ' failed to read dest', dest
                continue
            for k, i in b_map.iteritems():
                if k not in a_map:
                    continue
                a_v = a_entities[a_map[k]].val
                b_v = b_entities[i].val
                if a_v != b_v:
                    print ' differing entity', k, 'in', dest
                    print ' ', a_v.encode('utf-8')
                    print ' ', b_v.encode('utf-8')


cmdtable = {
    "l10n-impact":
    (l10n_impact, [],
     'hg l10n-impact FROM [TO]')
}
	# Copyright 2011 Mozilla Foundation
	#
	# This software may be used and distributed according to the terms of the
	# GNU General Public License version 2 or any later version.

	from mercurial import hg, commands
	from mercurial.match import match as _match
	from mercurial.node import nullid
	from mercurial.copies import copies as _copies

	from collections import defaultdict
	import re


	def _split(f):
	t = f.rsplit("/", 1)
	if len(t) == 1:
	t.insert(0, "")
	return t

	# copied from mercurial.copies, as it's internal
	def _dirname(f):
	s = f.rfind("/")
	if s == -1:
	return ""
	return f[:s]

	# copied from mercurial.copies, as it's internal
	def _dirs(files):
	d = set()
	for f in files:
	f = _dirname(f)
	while f not in d:
	d.add(f)
	f = _dirname(f)
	return d


	def _universal_newlines(content):
	"CompareLocales reads files with universal newlines, fake that"
	return content.replace('\r\n', '\n').replace('\r', '\n')


	def l10n_impact(ui, repo, from_, to_='default', **opts):
	"""Compare the l10n impact of the revision history

	Find file moves and copies for now.
	"""
	from Mozilla.Parser import getParser
	l10n = re.compile('/locales/en-US/')
	ctx1 = repo.changectx(from_)
	ctx2 = repo.changectx(to_)
	copies = _copies(repo, ctx1, ctx2, repo[nullid], checkdirs=False)[0]
	copies = dict(filter(lambda t: l10n.search(t[0]) or l10n.search(t[1]),
	copies.iteritems()))
	match = _match(repo.root, repo.getcwd, ['relre:%s' % l10n.pattern])
	changed, added, removed = repo.status(ctx1, ctx2, match=match)[:3]
	moves = {}
	realcopies = {}
	for dest, source in copies.iteritems():
	if source in removed:
	moves[dest] = source
	removed.remove(source)
	else:
	realcopies[dest] = source
	added = filter(lambda p: p not in copies, added)

	m1 = list(f for f in ctx1.manifest() if l10n.search(f))
	d1 = _dirs(m1)
	m2 = list(f for f in ctx1.manifest() if l10n.search(f))
	d2 = _dirs(m2)
	dirty = set()

	# find removed directories
	rmdirs = defaultdict(list)
	for path in removed:
	#print path
	_d = _dirname(path)
	if _d in d2:
	continue
	_p = _dirname(_d)
	while _p not in d2 and not _d.endswith('/locales/en-US'):
	_d = _p
	_p = _dirname(_d)
	rmdirs[_d].append(path[len(_d) + 1 :])

	# find moved directories
	mvdirs = defaultdict(list)
	mvmap = {}
	for dest, source in moves.iteritems():
	#print dest
	_d, _s = _dirname(dest), _dirname(source)
	if _d in d2 or _s in d2:
	# either not a new directory, or not an empty one
	continue
	_pd, _ld = _split(_d)
	_sd, _ls = _split(_s)
	while (_ld == _ls and
	_pd not in d2 and not _d.endswith('/locales/en-US') and
	_sd not in d2 and not _s.endswith('/locales/en-US')):
	_d, _s = _pd, _sd
	_pd, _ld = _split(_d)
	_sd, _ls = _split(_s)
	mvdirs[_d].append(dest[len(_d) + 1 :])
	mvmap[_d] = _s

	# find copied directories
	cpdirs = defaultdict(list)
	cpmap = {}
	for dest, source in realcopies.iteritems():
	#print dest
	_d, _s = _dirname(dest), _dirname(source)
	if _d in d2:
	# not a new directory
	continue
	_pd, _ld = _split(_d)
	_sd, _ls = _split(_s)
	while (_ld == _ls and
	_pd not in d2 and not _d.endswith('/locales/en-US') and
	_sd not in d2 and not _s.endswith('/locales/en-US')):
	_d, _s = _pd, _sd
	_pd, _ld = _split(_d)
	_sd, _ls = _split(_s)
	cpdirs[_d].append(dest[len(_d) + 1 :])
	cpmap[_d] = _s

	# find new directories
	newdirs = defaultdict(list)
	for path in added:
	#print path
	_d = _dirname(path)
	if _d in d1:
	continue
	_p = _dirname(_d)
	while _p not in d1 and not _d.endswith('/locales/en-US'):
	_d = _p
	_p = _dirname(_d)
	newdirs[_d].append(path[len(_d) + 1 :])

	# for dest, source in realcopies.iteritems():
	# print 'cp', source, dest
	# for dest, source in moves.iteritems():
	# print 'mv', source, dest
	for d, files in newdirs.iteritems():
	print ' added', d, '(%s)' % ', '.join(files)
	for d, files in rmdirs.iteritems():
	print ' removed', d, '(%s)' % ', '.join(files)
	for d, files in cpdirs.iteritems():
	print ' copied', cpmap[d], 'to', d, '(%s)' % ', '.join(files)
	for d, files in mvdirs.iteritems():
	print ' moved', mvmap[d], 'to', d, '(%s)' % ', '.join(files)

	for dest in sorted(changed + copies.keys()):
	source = copies.get(dest, dest)
	p = None
	try:
	p = getParser(dest)
	except UserWarning:
	if source == dest:
	print ' not checking modified', dest
	else:
	print ' not checking %s, was %s' % (dest, source)
	if p is not None:
	data = ctx1.filectx(source).data()
	data = _universal_newlines(data)
	try:
	p.readContents(data)
	a_entities, a_map = p.parse()
	except:
	print ' failed to read source', source
	continue
	data = ctx2.filectx(dest).data()
	data = _universal_newlines(data)
	try:
	p.readContents(data)
	b_entities, b_map = p.parse()
	except:
	print ' failed to read dest', dest
	continue
	for k, i in b_map.iteritems():
	if k not in a_map:
	continue
	a_v = a_entities[a_map[k]].val
	b_v = b_entities[i].val
	if a_v != b_v:
	print ' differing entity', k, 'in', dest
	print ' ', a_v.encode('utf-8')
	print ' ', b_v.encode('utf-8')


	cmdtable = {
	"l10n-impact":
	(l10n_impact, [],
	'hg l10n-impact FROM [TO]')
	}