Created
December 21, 2011 00:05
-
-
Save Pike/1503889 to your computer and use it in GitHub Desktop.
Mercurial extension to review the l10n impact between two revisions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2011 Mozilla Foundation | |
# | |
# This software may be used and distributed according to the terms of the | |
# GNU General Public License version 2 or any later version. | |
from mercurial import hg, commands | |
from mercurial.match import match as _match | |
from mercurial.node import nullid | |
from mercurial.copies import copies as _copies | |
from collections import defaultdict | |
import re | |
def _split(f): | |
t = f.rsplit("/", 1) | |
if len(t) == 1: | |
t.insert(0, "") | |
return t | |
# copied from mercurial.copies, as it's internal | |
def _dirname(f): | |
s = f.rfind("/") | |
if s == -1: | |
return "" | |
return f[:s] | |
# copied from mercurial.copies, as it's internal | |
def _dirs(files): | |
d = set() | |
for f in files: | |
f = _dirname(f) | |
while f not in d: | |
d.add(f) | |
f = _dirname(f) | |
return d | |
def _universal_newlines(content): | |
"CompareLocales reads files with universal newlines, fake that" | |
return content.replace('\r\n', '\n').replace('\r', '\n') | |
def l10n_impact(ui, repo, from_, to_='default', **opts): | |
"""Compare the l10n impact of the revision history | |
Find file moves and copies for now. | |
""" | |
from Mozilla.Parser import getParser | |
l10n = re.compile('/locales/en-US/') | |
ctx1 = repo.changectx(from_) | |
ctx2 = repo.changectx(to_) | |
copies = _copies(repo, ctx1, ctx2, repo[nullid], checkdirs=False)[0] | |
copies = dict(filter(lambda t: l10n.search(t[0]) or l10n.search(t[1]), | |
copies.iteritems())) | |
match = _match(repo.root, repo.getcwd, ['relre:%s' % l10n.pattern]) | |
changed, added, removed = repo.status(ctx1, ctx2, match=match)[:3] | |
moves = {} | |
realcopies = {} | |
for dest, source in copies.iteritems(): | |
if source in removed: | |
moves[dest] = source | |
removed.remove(source) | |
else: | |
realcopies[dest] = source | |
added = filter(lambda p: p not in copies, added) | |
m1 = list(f for f in ctx1.manifest() if l10n.search(f)) | |
d1 = _dirs(m1) | |
m2 = list(f for f in ctx1.manifest() if l10n.search(f)) | |
d2 = _dirs(m2) | |
dirty = set() | |
# find removed directories | |
rmdirs = defaultdict(list) | |
for path in removed: | |
#print path | |
_d = _dirname(path) | |
if _d in d2: | |
continue | |
_p = _dirname(_d) | |
while _p not in d2 and not _d.endswith('/locales/en-US'): | |
_d = _p | |
_p = _dirname(_d) | |
rmdirs[_d].append(path[len(_d) + 1 :]) | |
# find moved directories | |
mvdirs = defaultdict(list) | |
mvmap = {} | |
for dest, source in moves.iteritems(): | |
#print dest | |
_d, _s = _dirname(dest), _dirname(source) | |
if _d in d2 or _s in d2: | |
# either not a new directory, or not an empty one | |
continue | |
_pd, _ld = _split(_d) | |
_sd, _ls = _split(_s) | |
while (_ld == _ls and | |
_pd not in d2 and not _d.endswith('/locales/en-US') and | |
_sd not in d2 and not _s.endswith('/locales/en-US')): | |
_d, _s = _pd, _sd | |
_pd, _ld = _split(_d) | |
_sd, _ls = _split(_s) | |
mvdirs[_d].append(dest[len(_d) + 1 :]) | |
mvmap[_d] = _s | |
# find copied directories | |
cpdirs = defaultdict(list) | |
cpmap = {} | |
for dest, source in realcopies.iteritems(): | |
#print dest | |
_d, _s = _dirname(dest), _dirname(source) | |
if _d in d2: | |
# not a new directory | |
continue | |
_pd, _ld = _split(_d) | |
_sd, _ls = _split(_s) | |
while (_ld == _ls and | |
_pd not in d2 and not _d.endswith('/locales/en-US') and | |
_sd not in d2 and not _s.endswith('/locales/en-US')): | |
_d, _s = _pd, _sd | |
_pd, _ld = _split(_d) | |
_sd, _ls = _split(_s) | |
cpdirs[_d].append(dest[len(_d) + 1 :]) | |
cpmap[_d] = _s | |
# find new directories | |
newdirs = defaultdict(list) | |
for path in added: | |
#print path | |
_d = _dirname(path) | |
if _d in d1: | |
continue | |
_p = _dirname(_d) | |
while _p not in d1 and not _d.endswith('/locales/en-US'): | |
_d = _p | |
_p = _dirname(_d) | |
newdirs[_d].append(path[len(_d) + 1 :]) | |
# for dest, source in realcopies.iteritems(): | |
# print 'cp', source, dest | |
# for dest, source in moves.iteritems(): | |
# print 'mv', source, dest | |
for d, files in newdirs.iteritems(): | |
print ' added', d, '(%s)' % ', '.join(files) | |
for d, files in rmdirs.iteritems(): | |
print ' removed', d, '(%s)' % ', '.join(files) | |
for d, files in cpdirs.iteritems(): | |
print ' copied', cpmap[d], 'to', d, '(%s)' % ', '.join(files) | |
for d, files in mvdirs.iteritems(): | |
print ' moved', mvmap[d], 'to', d, '(%s)' % ', '.join(files) | |
for dest in sorted(changed + copies.keys()): | |
source = copies.get(dest, dest) | |
p = None | |
try: | |
p = getParser(dest) | |
except UserWarning: | |
if source == dest: | |
print ' not checking modified', dest | |
else: | |
print ' not checking %s, was %s' % (dest, source) | |
if p is not None: | |
data = ctx1.filectx(source).data() | |
data = _universal_newlines(data) | |
try: | |
p.readContents(data) | |
a_entities, a_map = p.parse() | |
except: | |
print ' failed to read source', source | |
continue | |
data = ctx2.filectx(dest).data() | |
data = _universal_newlines(data) | |
try: | |
p.readContents(data) | |
b_entities, b_map = p.parse() | |
except: | |
print ' failed to read dest', dest | |
continue | |
for k, i in b_map.iteritems(): | |
if k not in a_map: | |
continue | |
a_v = a_entities[a_map[k]].val | |
b_v = b_entities[i].val | |
if a_v != b_v: | |
print ' differing entity', k, 'in', dest | |
print ' ', a_v.encode('utf-8') | |
print ' ', b_v.encode('utf-8') | |
cmdtable = { | |
"l10n-impact": | |
(l10n_impact, [], | |
'hg l10n-impact FROM [TO]') | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment