jezdez/templatize-extract.diff Secret

## templatize-extract.diff
--- trans_real.py
+++ (clipboard)
@@ -1,50 +1,33 @@
-def templatize(src, origin=None):
+def extract_django(fileobj, keywords, comment_tags, options):
+    """Extract messages from Django template files.
+
+    :param fileobj: the file-like object the messages should be extracted from
+    :param keywords: a list of keywords (i.e. function names) that should
+                     be recognized as translation functions
+    :param comment_tags: a list of translator tags to search for and
+                         include in the results
+    :param options: a dictionary of additional options (optional)
+    :return: an iterator over ``(lineno, funcname, message, comments)``
+             tuples
+    :rtype: ``iterator``
     """
-    Turns a Django template into something that is understood by xgettext. It
-    does so by translating the Django translation tags into standard gettext
-    function invocations.
-    """
-    from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK,
-            TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK)
-    out = StringIO()
     intrans = False
     inplural = False
     singular = []
     plural = []
-    incomment = False
-    comment = []
-    for t in Lexer(src, origin).tokenize():
-        if incomment:
-            if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
-                content = u''.join(comment)
-                translators_comment_start = None
-                for lineno, line in enumerate(content.splitlines(True)):
-                    if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
-                        translators_comment_start = lineno
-                for lineno, line in enumerate(content.splitlines(True)):
-                    if translators_comment_start is not None and lineno >= translators_comment_start:
-                        out.write(u' # %s' % line)
-                    else:
-                        out.write(u' #\n')
-                incomment = False
-                comment = []
-            else:
-                comment.append(t.contents)
-        elif intrans:
+    lineno = 1
+    for t in Lexer(fileobj.read(), None).tokenize():
+        lineno += t.contents.count('\n')
+        if intrans:
             if t.token_type == TOKEN_BLOCK:
                 endbmatch = endblock_re.match(t.contents)
                 pluralmatch = plural_re.match(t.contents)
                 if endbmatch:
                     if inplural:
-                        out.write(' ngettext(%r,%r,count) ' % (''.join(singular), ''.join(plural)))
-                        for part in singular:
-                            out.write(blankout(part, 'S'))
-                        for part in plural:
-                            out.write(blankout(part, 'P'))
+                        yield lineno, 'ngettext', (unicode(''.join(singular)),
+                                                   unicode(''.join(plural))), []
                     else:
-                        out.write(' gettext(%r) ' % ''.join(singular))
-                        for part in singular:
-                            out.write(blankout(part, 'S'))
+                        yield lineno, None, unicode(''.join(singular)), []
                     intrans = False
                     inplural = False
                     singular = []
@@ -52,21 +35,18 @@
                 elif pluralmatch:
                     inplural = True
                 else:
-                    filemsg = ''
-                    if origin:
-                        filemsg = 'file %s, ' % origin
-                    raise SyntaxError("Translation blocks must not include other block tags: %s (%sline %d)" % (t.contents, filemsg, t.lineno))
+                    raise SyntaxError('Translation blocks must not include '
+                                      'other block tags: %s' % t.contents)
             elif t.token_type == TOKEN_VAR:
                 if inplural:
                     plural.append('%%(%s)s' % t.contents)
                 else:
                     singular.append('%%(%s)s' % t.contents)
             elif t.token_type == TOKEN_TEXT:
-                contents = t.contents.replace('%', '%%')
                 if inplural:
-                    plural.append(contents)
+                    plural.append(t.contents)
                 else:
-                    singular.append(contents)
+                    singular.append(t.contents)
         else:
             if t.token_type == TOKEN_BLOCK:
                 imatch = inline_re.match(t.contents)
@@ -74,35 +54,35 @@
                 cmatches = constant_re.findall(t.contents)
                 if imatch:
                     g = imatch.group(1)
-                    if g[0] == '"': g = g.strip('"')
-                    elif g[0] == "'": g = g.strip("'")
-                    out.write(' gettext(%r) ' % g)
+                    if g[0] == '"':
+                        g = g.strip('"')
+                    elif g[0] == "'":
+                        g = g.strip("'")
+                    yield lineno, None, unicode(g), []
                 elif bmatch:
                     for fmatch in constant_re.findall(t.contents):
-                        out.write(' _(%s) ' % fmatch)
+                        yield lineno, None, unicode(fmatch), []
                     intrans = True
                     inplural = False
                     singular = []
                     plural = []
                 elif cmatches:
                     for cmatch in cmatches:
-                        out.write(' _(%s) ' % cmatch)
-                elif t.contents == 'comment':
-                    incomment = True
-                else:
-                    out.write(blankout(t.contents, 'B'))
+                        yield lineno, None, unicode(cmatch), []
             elif t.token_type == TOKEN_VAR:
                 parts = t.contents.split('|')
                 cmatch = constant_re.match(parts[0])
                 if cmatch:
-                    out.write(' _(%s) ' % cmatch.group(1))
+                    yield lineno, None, unicode(cmatch.group(1)), []
                 for p in parts[1:]:
                     if p.find(':_(') >= 0:
-                        out.write(' %s ' % p.split(':',1)[1])
-                    else:
-                        out.write(blankout(p, 'F'))
-            elif t.token_type == TOKEN_COMMENT:
-                out.write(' # %s' % t.contents)
-            else:
-                out.write(blankout(t.contents, 'X'))
-    return out.getvalue()
+                        p1 = p.split(':',1)[1]
+                        if p1[0] == '_':
+                            p1 = p1[1:]
+                        if p1[0] == '(':
+                            p1 = p1.strip('()')
+                        if p1[0] == "'":
+                            p1 = p1.strip("'")
+                        elif p1[0] == '"':
+                            p1 = p1.strip('"')
+                        yield lineno, None, unicode(p1), []
\ No newline at end of file
	--- trans_real.py
	+++ (clipboard)
	@@ -1,50 +1,33 @@
	-def templatize(src, origin=None):
	+def extract_django(fileobj, keywords, comment_tags, options):
	+ """Extract messages from Django template files.
	+
	+ :param fileobj: the file-like object the messages should be extracted from
	+ :param keywords: a list of keywords (i.e. function names) that should
	+ be recognized as translation functions
	+ :param comment_tags: a list of translator tags to search for and
	+ include in the results
	+ :param options: a dictionary of additional options (optional)
	+ :return: an iterator over ``(lineno, funcname, message, comments)``
	+ tuples
	+ :rtype: ``iterator``
	"""
	- Turns a Django template into something that is understood by xgettext. It
	- does so by translating the Django translation tags into standard gettext
	- function invocations.
	- """
	- from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK,
	- TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK)
	- out = StringIO()
	intrans = False
	inplural = False
	singular = []
	plural = []
	- incomment = False
	- comment = []
	- for t in Lexer(src, origin).tokenize():
	- if incomment:
	- if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
	- content = u''.join(comment)
	- translators_comment_start = None
	- for lineno, line in enumerate(content.splitlines(True)):
	- if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
	- translators_comment_start = lineno
	- for lineno, line in enumerate(content.splitlines(True)):
	- if translators_comment_start is not None and lineno >= translators_comment_start:
	- out.write(u' # %s' % line)
	- else:
	- out.write(u' #\n')
	- incomment = False
	- comment = []
	- else:
	- comment.append(t.contents)
	- elif intrans:
	+ lineno = 1
	+ for t in Lexer(fileobj.read(), None).tokenize():
	+ lineno += t.contents.count('\n')
	+ if intrans:
	if t.token_type == TOKEN_BLOCK:
	endbmatch = endblock_re.match(t.contents)
	pluralmatch = plural_re.match(t.contents)
	if endbmatch:
	if inplural:
	- out.write(' ngettext(%r,%r,count) ' % (''.join(singular), ''.join(plural)))
	- for part in singular:
	- out.write(blankout(part, 'S'))
	- for part in plural:
	- out.write(blankout(part, 'P'))
	+ yield lineno, 'ngettext', (unicode(''.join(singular)),
	+ unicode(''.join(plural))), []
	else:
	- out.write(' gettext(%r) ' % ''.join(singular))
	- for part in singular:
	- out.write(blankout(part, 'S'))
	+ yield lineno, None, unicode(''.join(singular)), []
	intrans = False
	inplural = False
	singular = []
	@@ -52,21 +35,18 @@
	elif pluralmatch:
	inplural = True
	else:
	- filemsg = ''
	- if origin:
	- filemsg = 'file %s, ' % origin
	- raise SyntaxError("Translation blocks must not include other block tags: %s (%sline %d)" % (t.contents, filemsg, t.lineno))
	+ raise SyntaxError('Translation blocks must not include '
	+ 'other block tags: %s' % t.contents)
	elif t.token_type == TOKEN_VAR:
	if inplural:
	plural.append('%%(%s)s' % t.contents)
	else:
	singular.append('%%(%s)s' % t.contents)
	elif t.token_type == TOKEN_TEXT:
	- contents = t.contents.replace('%', '%%')
	if inplural:
	- plural.append(contents)
	+ plural.append(t.contents)
	else:
	- singular.append(contents)
	+ singular.append(t.contents)
	else:
	if t.token_type == TOKEN_BLOCK:
	imatch = inline_re.match(t.contents)
	@@ -74,35 +54,35 @@
	cmatches = constant_re.findall(t.contents)
	if imatch:
	g = imatch.group(1)
	- if g[0] == '"': g = g.strip('"')
	- elif g[0] == "'": g = g.strip("'")
	- out.write(' gettext(%r) ' % g)
	+ if g[0] == '"':
	+ g = g.strip('"')
	+ elif g[0] == "'":
	+ g = g.strip("'")
	+ yield lineno, None, unicode(g), []
	elif bmatch:
	for fmatch in constant_re.findall(t.contents):
	- out.write(' _(%s) ' % fmatch)
	+ yield lineno, None, unicode(fmatch), []
	intrans = True
	inplural = False
	singular = []
	plural = []
	elif cmatches:
	for cmatch in cmatches:
	- out.write(' _(%s) ' % cmatch)
	- elif t.contents == 'comment':
	- incomment = True
	- else:
	- out.write(blankout(t.contents, 'B'))
	+ yield lineno, None, unicode(cmatch), []
	elif t.token_type == TOKEN_VAR:
	parts = t.contents.split('\|')
	cmatch = constant_re.match(parts[0])
	if cmatch:
	- out.write(' _(%s) ' % cmatch.group(1))
	+ yield lineno, None, unicode(cmatch.group(1)), []
	for p in parts[1:]:
	if p.find(':_(') >= 0:
	- out.write(' %s ' % p.split(':',1)[1])
	- else:
	- out.write(blankout(p, 'F'))
	- elif t.token_type == TOKEN_COMMENT:
	- out.write(' # %s' % t.contents)
	- else:
	- out.write(blankout(t.contents, 'X'))
	- return out.getvalue()
	+ p1 = p.split(':',1)[1]
	+ if p1[0] == '_':
	+ p1 = p1[1:]
	+ if p1[0] == '(':
	+ p1 = p1.strip('()')
	+ if p1[0] == "'":
	+ p1 = p1.strip("'")
	+ elif p1[0] == '"':
	+ p1 = p1.strip('"')
	+ yield lineno, None, unicode(p1), []
	\ No newline at end of file