Skip to content

Instantly share code, notes, and snippets.

Created December 18, 2012 19:04
Show Gist options
  • Save anonymous/4330910 to your computer and use it in GitHub Desktop.
Save anonymous/4330910 to your computer and use it in GitHub Desktop.
'''
# http://stackoverflow.com/a/9518288/190597
See also http://code.google.com/p/prettytable/
'''
import operator
import itertools
import re
import math
import functools
import utils_iter as ui
import logging
logger = logging.getLogger(__name__)
# Uncomment to see debugging information
# logging.basicConfig(level=logging.DEBUG, format='%(message)s')
try:
zip_longest = itertools.zip_longest
except AttributeError:
zip_longest = itertools.izip_longest
def tableinfo(rows,
sep = u'─',
corner = u'·',
delim = None,
corner_delim = None,
prefix = u'│ ',
postfix = u' │',
colsep = u' │ ',
has_header = False,
header = None,
separate_rows = False,
framed = (True, True),
separate_empty_lines = True,
justify = 'right',
wrapfunc = lambda x:x,
width = None,
phantom = None,
**kw):
# Based on: http://code.activestate.com/recipes/267662-table-indentation/
# Author: http://code.activestate.com/recipes/users/2591466/ (George Sakkis)
def row_wrapper(row):
try:
new_rows = [wrapper(item).split('\n') for (item, wrapper)
in zip(row, wrapfunc)]
except TypeError:
# This happens if wrapfunc is not an iterator
# TypeError: zip argument #2 must support iteration
new_rows = [wrapfunc(item).split('\n') for item in row]
return list(zip_longest(*new_rows, fillvalue = u''))
if header:
has_header = True
rows = itertools.chain(normalize([header]), rows)
logical_rows = [row_wrapper(row) for row in rows]
columns = zip(*functools.reduce(operator.add, logical_rows))
max_width = (
[max(len(item) for item in column) for column in columns]
if width is None else width )
if phantom is not None:
max_width = [max(x) for x in zip(phantom, max_width)]
lcorner = corner + sep*(len(prefix)-1) if len(prefix) >= 1 else u''
rcorner = sep*(len(postfix)-1) + corner if len(postfix) >= 1 else u''
delim = itertools.repeat(colsep) if delim is None else itertools.cycle(delim)
corner_delim = (delim_to_corner(delim, sep, corner)
if corner_delim is None else itertools.cycle(corner_delim))
row_separator = (sep*w for w in max_width)
row_separator = (lcorner
+''.join(list(ui.iterjoin(corner_delim, row_separator)))
+rcorner)
dispatch = {'center':unicode.center, 'right':unicode.rjust, 'left':unicode.ljust}
try: justify = itertools.cycle([dispatch[item] for item in justify])
except KeyError: justify = itertools.repeat(dispatch[justify.lower()])
result = []
for physical_rows in logical_rows:
row_result = []
for row in physical_rows:
if separate_empty_lines and not ''.join(row).strip():
row_result.append(row_separator)
else:
pieces = [justifier(item, w) for (item, w, justifier)
in zip(row, max_width, justify)]
row_result.append(
prefix
+ u''.join(list(ui.iterjoin(delim, pieces)))
+ postfix )
result.append(u'\n'.join(row_result))
if has_header and not separate_rows:
result.insert(1, row_separator)
has_header = False
joiner = u'\n'+row_separator+u'\n' if separate_rows else u'\n'
result = joiner.join(result)
top_framed, bottom_framed = framed
if top_framed: result = row_separator+u'\n'+result
if bottom_framed: result = result+u'\n'+row_separator
return result, max_width
def multitable(*datasets, **kwargs):
'''
Calculate min_widths
Calculate total_length
For all non-aligned rows:
Expand row to total_length
Calculate column_locations
For all aligned rows:
Enumerate all possible expansions of row to total_length (candidate_rows)
For each candidate row:
Record key=(number successfully aligned columns,
variation from proportional expansion)
Sort candidate_rows according to key
Set row to candidate_rows[0]
'''
datasets = [normalize(data) for data in datasets]
verbose = kwargs.get('verbose', False)
looseness = kwargs.get('looseness', 0)
kws = itertools.cycle(kwargs.get('kws', [{}]))
min_widths, delim_widths, total_lengths = [], [], []
for data, kw in zip(datasets, kws):
result, min_width = tableinfo(data, **kw)
min_widths.append(min_width)
delim = kw.get('delim', None)
delim = itertools.repeat(u' │ ') if delim is None else itertools.cycle(delim)
delim_width = ( [len(kw.get('prefix', u'│ '))]
+ [len(d) for _, d in zip(min_width[1:], delim)]
+ [len(kw.get('postfix', u' │'))] )
delim_widths.append(delim_width)
total_lengths.append(sum(min_width)+sum(delim_width))
total_length = max(total_lengths)+looseness
if verbose:
logger.info('total_length: {0}'.format(total_length))
logger.info('min_widths: {0}'.format(min_widths))
new_widths = []
for width, kw, delim_width in zip(min_widths, kws, delim_widths):
if kw.get('align', False):
new_widths.append(width)
else:
new_widths.append(expand_widths(width, total_length-sum(delim_width)))
left_column_locations = set()
right_column_locations = set()
for width, kw, delim_width in zip(new_widths, kws, delim_widths):
if not kw.get('align', False):
loc = width_to_locations(width, delim_width)
left_column_locations = left_column_locations.union(loc[::2])
right_column_locations = right_column_locations.union(loc[1::2])
if verbose:
logger.info('left_column_locations: {0}'.format(
sorted(left_column_locations)))
logger.info('right_column_locations: {0}'.format(
sorted(right_column_locations)))
final_widths = []
for width, kw, delim_width in zip(new_widths, kws, delim_widths):
candidates = []
target_length = total_length-sum(delim_width)
if kw.get('align', False):
proportional_width = expand_widths(width, target_length)
for candidate_width in enumerate_widths(width, target_length):
variation = sum((a-b)**2
for a, b in zip(candidate_width, proportional_width))
candidate_locations = width_to_locations(candidate_width, delim_width)
left_hits = len(left_column_locations.intersection(
candidate_locations[::2]))
right_hits = len(right_column_locations.intersection(
candidate_locations[1::2]))
hits = left_hits+right_hits
candidates.append((candidate_width, -hits, variation))
if verbose:
logger.info('candidate: {c}, hits: {h}, var: {v}'.format(
c = candidate_locations,
h = hits,
v = variation))
candidates.sort(key = lambda x: x[1:])
if verbose:
logger.info('winner: {w}'.format(w = candidates[0]))
final_widths.append(candidates[0][0])
else:
final_widths.append(width)
result = [
table(data, **dict(kw, width = width))
for data, width, kw in zip(datasets, final_widths, kws) ]
result = u'\n'.join(result)
return result
def width_to_locations(width, delim_width):
return tuple(ui.cumsum((ui.iterjoin(width, delim_width))))
def enumerate_widths(min_width, total_length):
total_min_width = sum(min_width)
diff = total_length-total_min_width+len(min_width)
for indices in ui.partition_indices(length = diff, groups = len(min_width)):
yield [end-begin+width-1 for (begin, end), width in zip(indices, min_width)]
def table(rows,
sep = u'-',
corner = u'|',
delim = [' | '],
corner_delim = ['-+-'],
prefix = u'| ',
postfix = u' |',
has_header = False,
header = None,
separate_rows = False,
framed = (True, True),
separate_empty_lines = True,
justify = 'right',
wrapfunc = lambda x:x,
width = None,
**kw):
'''
·──────────────────────·─────────────────────────────────────────────────────────·
│ rows │ A sequence of sequences of items, one sequence per row. │
·──────────────────────·─────────────────────────────────────────────────────────·
│ framed │ row separator on top and bottom │
·──────────────────────·─────────────────────────────────────────────────────────·
│ sep │ Character to be used for the row separator line (if │
│ │ has_header==True or separate_rows==True). │
·──────────────────────·─────────────────────────────────────────────────────────·
│ delim │ A sequence of column delimiters. The delimiters are │
│ │ repeated in a cycle │
·──────────────────────·─────────────────────────────────────────────────────────·
│ corner_delim │ A sequence of column delimiters used in row separators, │
│ │ repeated in a cycle. │
·──────────────────────·─────────────────────────────────────────────────────────·
│ prefix │ A string prepended to each printed row. │
·──────────────────────·─────────────────────────────────────────────────────────·
│ postfix │ A string appended to each printed row. │
·──────────────────────·─────────────────────────────────────────────────────────·
│ has_header │ True if there is a row separator between the first and │
│ │ second row │
·──────────────────────·─────────────────────────────────────────────────────────·
│ separate_rows │ True if all rows are to be separated │
·──────────────────────·─────────────────────────────────────────────────────────·
│ framed │ True if top (and/or bottom) have a row separator │
·──────────────────────·─────────────────────────────────────────────────────────·
│ separate_empty_lines │ replace empty lines with a row separator │
·──────────────────────·─────────────────────────────────────────────────────────·
│ justify │ Determines how the data is justified in each column. │
│ │ Valid values are 'left','right' and 'center', or a list │
│ │ of such values (one element per column). │
·──────────────────────·─────────────────────────────────────────────────────────·
│ wrapfunc │ A function f(text), or list of functions, for wrapping │
│ │ text; each element in the table is first wrapped by │
│ │ this function. If wrapfunc is a list of functions, then │
│ │ table will apply one function per column. │
·──────────────────────·─────────────────────────────────────────────────────────·
│ width │ A list of column widths. If None, the widths will be │
│ │ calculated. │
·──────────────────────·─────────────────────────────────────────────────────────·
'''
result, max_width = tableinfo(
normalize(rows), sep = sep, corner = corner, delim = delim,
corner_delim = corner_delim,
prefix = prefix, postfix = postfix, has_header = has_header, header = header,
separate_rows = separate_rows, framed = framed,
separate_empty_lines = separate_empty_lines, justify = justify,
wrapfunc = wrapfunc, width = width)
return result
def ascii_table(rows,
sep = '-',
corner = '+',
delim = [' | '],
corner_delim = None,
prefix = u'| ',
postfix = u' |',
has_header = False,
header = None,
separate_rows = False,
framed = (True, True),
separate_empty_lines = True,
justify = 'right',
wrapfunc = lambda x:x,
width = None,
**kw):
result, max_width = tableinfo(
normalize(rows), sep = sep, corner = corner, delim = delim,
corner_delim = corner_delim,
prefix = prefix, postfix = postfix, has_header = has_header, header = header,
separate_rows = separate_rows, framed = framed,
separate_empty_lines = separate_empty_lines, justify = justify,
wrapfunc = wrapfunc, width = width)
return result
def normalize(rows):
new_rows = []
for row in rows:
new_rows.append([unicode(elt).expandtabs() for elt in row])
return new_rows
def delim_to_corner(delim, sep, corner):
for d in delim:
d = d.replace(u'│', corner).replace(u'|', corner)
for c in '< >': d = d.replace(c, sep)
yield d
def expand_widths(widths, target_size):
new_widths = []
for i, w in enumerate(widths):
total_length = sum(widths[i:])
new_width = int(round(float(w)/total_length*target_size)+0.1) if w else 0
target_size -= new_width
new_widths.append(new_width)
return new_widths
def wrap_onspace(text, width):
# written by Mike Brown
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
'''
A word-wrap function that preserves existing line breaks
and most spaces in the text. Expects that existing line
breaks are posix newlines (\n).
'''
words = iter(text.split(' '))
line = next(words)
for word in words:
contemplated_width = (len(line[line.rfind('\n')+1:]) +
len(word.split('\n', 1)[0]))
if contemplated_width >= width:
line += '\n'+word
else:
line += ' '+word
return line
def wrap_onspace_strict(text, width):
'''Similar to wrap_onspace, but enforces the width constraint:
words longer than width are split.'''
word_pat = re.compile(r'\S{'+unicode(width)+r',}')
return wrap_onspace(word_pat.sub(lambda m: wrap_always(m.group(), width), text),
width)
def wrap_always(text, width):
'''A simple word-wrap function that wraps text on exactly width characters.
It doesn\'t split the text in words.'''
return '\n'.join( text[width*i:width*(i+1)]
for i in xrange(int(math.ceil(1.*len(text)/width))) )
def onspace(width):
return functools.partial(wrap_onspace, width = width)
def strict(width):
return functools.partial(wrap_onspace_strict, width = width)
def always(width):
return functools.partial(wrap_always, width = width)
def table_array(arr, *args, **kw):
'''table_array automatically labels the columns with `dtype.names`
'''
fields = arr.dtype.names
data = [fields]
data.extend([[row[field]
for field in fields]
for row in arr])
return table(data, has_header = True, *args, **kw)
def fmt(sequence, digits):
elt_fmt = functools.partial('{elt:.{digits}f}'.format, digits = digits)
def fmt(elt):
try: return elt_fmt(elt = elt)
except ValueError: return elt
return map(fmt, sequence)
if __name__ == '__main__':
labels = ('First Name', 'Last Name', 'Age', 'Position')
data = '''\
John,Smith,24,Software Engineer
Mary,Brohowski,23,Sales Manager
Aristidis,Papageorgopoulos,28,Senior Researcher'''
rows = [row.strip().split(',') for row in data.splitlines()]
print('Without wrapping function')
print(table([labels]+rows, has_header = True))
width = 10
for wrapper in (wrap_always, wrap_onspace, wrap_onspace_strict):
print('Wrapping function: %s(x,width=%d):' % (wrapper.__name__, width))
print(table([labels]+rows, has_header = True, separate_rows = True,
prefix = u'│ ', postfix = u' │',
wrapfunc = lambda x: wrapper(x, width)))
# output:
# Without wrapping function
# ·────────────·──────────────────·─────·───────────────────·
# │ First Name │ Last Name │ Age │ Position │
# ·────────────·──────────────────·─────·───────────────────·
# │ John │ Smith │ 24 │ Software Engineer │
# │ Mary │ Brohowski │ 23 │ Sales Manager │
# │ Aristidis │ Papageorgopoulos │ 28 │ Senior Researcher │
# ·────────────·──────────────────·─────·───────────────────·
# Wrapping function: wrap_always(x,width=10):
# ·────────────·────────────·─────·────────────·
# │ First Name │ Last Name │ Age │ Position │
# ·────────────·────────────·─────·────────────·
# │ John │ Smith │ 24 │ Software E │
# │ │ │ │ ngineer │
# ·────────────·────────────·─────·────────────·
# │ Mary │ Brohowski │ 23 │ Sales Mana │
# │ │ │ │ ger │
# ·────────────·────────────·─────·────────────·
# │ Aristidis │ Papageorgo │ 28 │ Senior Res │
# │ │ poulos │ │ earcher │
# ·────────────·────────────·─────·────────────·
# Wrapping function: wrap_onspace(x,width=10):
# ·────────────·──────────────────·─────·────────────·
# │ First Name │ Last Name │ Age │ Position │
# ·────────────·──────────────────·─────·────────────·
# │ John │ Smith │ 24 │ Software │
# │ │ │ │ Engineer │
# ·────────────·──────────────────·─────·────────────·
# │ Mary │ Brohowski │ 23 │ Sales │
# │ │ │ │ Manager │
# ·────────────·──────────────────·─────·────────────·
# │ Aristidis │ Papageorgopoulos │ 28 │ Senior │
# │ │ │ │ Researcher │
# ·────────────·──────────────────·─────·────────────·
# Wrapping function: wrap_onspace_strict(x,width=10):
# ·────────────·────────────·─────·────────────·
# │ First Name │ Last Name │ Age │ Position │
# ·────────────·────────────·─────·────────────·
# │ John │ Smith │ 24 │ Software │
# │ │ │ │ Engineer │
# ·────────────·────────────·─────·────────────·
# │ Mary │ Brohowski │ 23 │ Sales │
# │ │ │ │ Manager │
# ·────────────·────────────·─────·────────────·
# │ Aristidis │ Papageorgo │ 28 │ Senior │
# │ │ poulos │ │ Researcher │
# ·────────────·────────────·─────·────────────·
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment