Skip to content

Instantly share code, notes, and snippets.

@jezdez
Created April 14, 2011 19:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jezdez/05a28232e63bc30277d5 to your computer and use it in GitHub Desktop.
Save jezdez/05a28232e63bc30277d5 to your computer and use it in GitHub Desktop.
The diff between django.utils.translation.templatize and babeldjango.extract.extract_django
--- trans_real.py
+++ (clipboard)
@@ -1,50 +1,33 @@
-def templatize(src, origin=None):
+def extract_django(fileobj, keywords, comment_tags, options):
+ """Extract messages from Django template files.
+
+ :param fileobj: the file-like object the messages should be extracted from
+ :param keywords: a list of keywords (i.e. function names) that should
+ be recognized as translation functions
+ :param comment_tags: a list of translator tags to search for and
+ include in the results
+ :param options: a dictionary of additional options (optional)
+ :return: an iterator over ``(lineno, funcname, message, comments)``
+ tuples
+ :rtype: ``iterator``
"""
- Turns a Django template into something that is understood by xgettext. It
- does so by translating the Django translation tags into standard gettext
- function invocations.
- """
- from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK,
- TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK)
- out = StringIO()
intrans = False
inplural = False
singular = []
plural = []
- incomment = False
- comment = []
- for t in Lexer(src, origin).tokenize():
- if incomment:
- if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
- content = u''.join(comment)
- translators_comment_start = None
- for lineno, line in enumerate(content.splitlines(True)):
- if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
- translators_comment_start = lineno
- for lineno, line in enumerate(content.splitlines(True)):
- if translators_comment_start is not None and lineno >= translators_comment_start:
- out.write(u' # %s' % line)
- else:
- out.write(u' #\n')
- incomment = False
- comment = []
- else:
- comment.append(t.contents)
- elif intrans:
+ lineno = 1
+ for t in Lexer(fileobj.read(), None).tokenize():
+ lineno += t.contents.count('\n')
+ if intrans:
if t.token_type == TOKEN_BLOCK:
endbmatch = endblock_re.match(t.contents)
pluralmatch = plural_re.match(t.contents)
if endbmatch:
if inplural:
- out.write(' ngettext(%r,%r,count) ' % (''.join(singular), ''.join(plural)))
- for part in singular:
- out.write(blankout(part, 'S'))
- for part in plural:
- out.write(blankout(part, 'P'))
+ yield lineno, 'ngettext', (unicode(''.join(singular)),
+ unicode(''.join(plural))), []
else:
- out.write(' gettext(%r) ' % ''.join(singular))
- for part in singular:
- out.write(blankout(part, 'S'))
+ yield lineno, None, unicode(''.join(singular)), []
intrans = False
inplural = False
singular = []
@@ -52,21 +35,18 @@
elif pluralmatch:
inplural = True
else:
- filemsg = ''
- if origin:
- filemsg = 'file %s, ' % origin
- raise SyntaxError("Translation blocks must not include other block tags: %s (%sline %d)" % (t.contents, filemsg, t.lineno))
+ raise SyntaxError('Translation blocks must not include '
+ 'other block tags: %s' % t.contents)
elif t.token_type == TOKEN_VAR:
if inplural:
plural.append('%%(%s)s' % t.contents)
else:
singular.append('%%(%s)s' % t.contents)
elif t.token_type == TOKEN_TEXT:
- contents = t.contents.replace('%', '%%')
if inplural:
- plural.append(contents)
+ plural.append(t.contents)
else:
- singular.append(contents)
+ singular.append(t.contents)
else:
if t.token_type == TOKEN_BLOCK:
imatch = inline_re.match(t.contents)
@@ -74,35 +54,35 @@
cmatches = constant_re.findall(t.contents)
if imatch:
g = imatch.group(1)
- if g[0] == '"': g = g.strip('"')
- elif g[0] == "'": g = g.strip("'")
- out.write(' gettext(%r) ' % g)
+ if g[0] == '"':
+ g = g.strip('"')
+ elif g[0] == "'":
+ g = g.strip("'")
+ yield lineno, None, unicode(g), []
elif bmatch:
for fmatch in constant_re.findall(t.contents):
- out.write(' _(%s) ' % fmatch)
+ yield lineno, None, unicode(fmatch), []
intrans = True
inplural = False
singular = []
plural = []
elif cmatches:
for cmatch in cmatches:
- out.write(' _(%s) ' % cmatch)
- elif t.contents == 'comment':
- incomment = True
- else:
- out.write(blankout(t.contents, 'B'))
+ yield lineno, None, unicode(cmatch), []
elif t.token_type == TOKEN_VAR:
parts = t.contents.split('|')
cmatch = constant_re.match(parts[0])
if cmatch:
- out.write(' _(%s) ' % cmatch.group(1))
+ yield lineno, None, unicode(cmatch.group(1)), []
for p in parts[1:]:
if p.find(':_(') >= 0:
- out.write(' %s ' % p.split(':',1)[1])
- else:
- out.write(blankout(p, 'F'))
- elif t.token_type == TOKEN_COMMENT:
- out.write(' # %s' % t.contents)
- else:
- out.write(blankout(t.contents, 'X'))
- return out.getvalue()
+ p1 = p.split(':',1)[1]
+ if p1[0] == '_':
+ p1 = p1[1:]
+ if p1[0] == '(':
+ p1 = p1.strip('()')
+ if p1[0] == "'":
+ p1 = p1.strip("'")
+ elif p1[0] == '"':
+ p1 = p1.strip('"')
+ yield lineno, None, unicode(p1), []
\ No newline at end of file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment