Skip to content

Instantly share code, notes, and snippets.

@florentx
Created September 6, 2010 23:14
Show Gist options
  • Save florentx/567619 to your computer and use it in GitHub Desktop.
Save florentx/567619 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""String formatting for Python 2.5.
This is an implementation of the new string formatting (PEP 3101).
Most PEP 3101 features are implemented.
TODO:
- alignment '='
- thousand separator
"""
import re
FORMAT_STR = re.compile(
r'((?<!{)(?:{{)+' # '{{'
r'|(?:}})+(?!})' # '}}
r'|{(?:[^{](?:[^{}]+|{[^{}]*})*)?})' # replacement field
)
FORMAT_SUB = re.compile(r'({[^{}]*})') # nested replacement field
FORMAT_SPEC = re.compile(
r'((?:[^{}]?[<>=^])?)' # alignment
r'([-+ ]?)' # sign
r'(#?)' r'(\d*)' r'(,?)' # base prefix, minimal width, thousands sep
r'((?:\.\d+)?)' # precision
r'([bcdefgnosxEFGX%]?)$' # type
)
FIELD_PART = re.compile('((?:^|\.)[^[.]+|\[[^]]+\])')
def _strformat(value, format_spec=""):
"""Internal string formatter."""
m = FORMAT_SPEC.match(format_spec)
if not m:
return str(value)
align, sign, prefix, width, comma, precision, conversion = m.groups()
zero, width = (width and width[0] == '0'), int(width or 0)
fill, align = (align[:-1] or ' '), align[-1:]
if not align:
# numeric value right aligned by default
if hasattr(value, '__float__'):
align = '>'
else:
align = '<'
elif align == '^':
value = str(value)
padding = width - len(value)
if padding > 0 and padding % 2:
value += fill
value = value.center(width, fill)
elif align == '=':
# '=' and '^' filling
pass
if comma:
# thousand separator
pass
if fill not in ' 0':
if align == '<':
value = value.ljust(width, fill)
else:
value = value.rjust(width, fill)
oldspec = (r'%%%(flags)s%(width)s%(precision)s%(type)s' % {
'flags': ('#' if (prefix or zero) else '') +
('-' if (align == '<') else '') +
('0' if (fill == '0') else '') +
(sign if (sign != '-') else ''),
'width': width,
'precision': precision,
'type': conversion or 's',
})
return oldspec % value
def _format_value(value, parts, conv, spec):
for part in parts:
if part.startswith('.'):
value = getattr(value, part[1:])
else:
key = part[1:-1]
if key.isdigit():
value = value[int(key)]
else:
value = value[key]
if conv:
value = ['%s', '%r'][conv == 'r'] % value
if hasattr(value, '__format__'):
value = value.__format__(spec)
elif hasattr(value, 'strftime') and spec:
value = value.strftime(str(spec))
else:
value = _strformat(value, spec)
return value
class StrRepl(object):
__slots__ = 'index', 'indexes', 'kwords', 'nested', 'string'
def __init__(self, format_string):
self.index = 0
self.indexes = set()
self.kwords = {}
self.nested = {}
self.string = FORMAT_STR.sub(self.prepare, format_string)
self.indexes = (sorted(self.indexes)
if self.indexes else range(self.index))
def prepare(self, match):
part = match.group(0)
if part[0] == part[-1]:
# '{{' or '}}'
assert part == part[0] * len(part)
return part[:len(part) // 2]
repl = part[1:-1]
field, _, format_spec = repl.partition(':')
literal, _, conversion = field.partition('!')
name_parts = FIELD_PART.findall(literal)
if not name_parts or name_parts[0].startswith(('.', '[')):
name = ''
else:
name = name_parts.pop(0)
if not name:
if not self.index and self.indexes:
raise ValueError(
'cannot switch from manual field specification '
'to automatic field numbering')
name = str(self.index)
self.index += 1
elif name.isdigit():
if self.index:
raise ValueError(
'cannot switch from automatic field numbering '
'to manual field specification')
self.indexes.add(int(name))
if '{' in format_spec:
format_spec = FORMAT_SUB.sub(self.prepare, format_spec)
rv = (name_parts, conversion, format_spec)
self.nested.setdefault(name, []).append(rv)
else:
rv = (name_parts, conversion, format_spec)
self.kwords.setdefault(name, []).append(rv)
return r'%%(%s)s' % id(rv)
def format(self, *args, **kwargs):
if args:
kwargs.update(dict((str(i), value)
for (i, value) in zip(self.indexes, args)))
params = {}
for name, items in self.kwords.items():
value = kwargs[name]
for item in items:
parts, conv, spec = item
params[str(id(item))] = _format_value(value, parts, conv, spec)
for name, items in self.nested.items():
value = kwargs[name]
for item in items:
parts, conv, spec = item
spec = spec % params
params[str(id(item))] = _format_value(value, parts, conv, spec)
return self.string % params
def string_format(string, *args, **kwargs):
"""Format a string using new syntax."""
return StrRepl(string).format(*args, **kwargs)
def test():
import datetime
f = string_format
assert f(u"{0:{width}.{precision}s}", 'hello world',
width=8, precision=5) == u'hello '
d = datetime.date(2010, 9, 7)
assert f(u"The year is {0.year}", d) == u"The year is 2010"
assert f(u"Tested on {0:%Y-%m-%d}", d) == u"Tested on 2010-09-07"
print 'Test successful'
if __name__ == '__main__':
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment