Created
December 18, 2012 19:04
-
-
Save anonymous/4330910 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
# http://stackoverflow.com/a/9518288/190597 | |
See also http://code.google.com/p/prettytable/ | |
''' | |
import operator | |
import itertools | |
import re | |
import math | |
import functools | |
import utils_iter as ui | |
import logging | |
logger = logging.getLogger(__name__) | |
# Uncomment to see debugging information | |
# logging.basicConfig(level=logging.DEBUG, format='%(message)s') | |
try: | |
zip_longest = itertools.zip_longest | |
except AttributeError: | |
zip_longest = itertools.izip_longest | |
def tableinfo(rows, | |
sep = u'─', | |
corner = u'·', | |
delim = None, | |
corner_delim = None, | |
prefix = u'│ ', | |
postfix = u' │', | |
colsep = u' │ ', | |
has_header = False, | |
header = None, | |
separate_rows = False, | |
framed = (True, True), | |
separate_empty_lines = True, | |
justify = 'right', | |
wrapfunc = lambda x:x, | |
width = None, | |
phantom = None, | |
**kw): | |
# Based on: http://code.activestate.com/recipes/267662-table-indentation/ | |
# Author: http://code.activestate.com/recipes/users/2591466/ (George Sakkis) | |
def row_wrapper(row): | |
try: | |
new_rows = [wrapper(item).split('\n') for (item, wrapper) | |
in zip(row, wrapfunc)] | |
except TypeError: | |
# This happens if wrapfunc is not an iterator | |
# TypeError: zip argument #2 must support iteration | |
new_rows = [wrapfunc(item).split('\n') for item in row] | |
return list(zip_longest(*new_rows, fillvalue = u'')) | |
if header: | |
has_header = True | |
rows = itertools.chain(normalize([header]), rows) | |
logical_rows = [row_wrapper(row) for row in rows] | |
columns = zip(*functools.reduce(operator.add, logical_rows)) | |
max_width = ( | |
[max(len(item) for item in column) for column in columns] | |
if width is None else width ) | |
if phantom is not None: | |
max_width = [max(x) for x in zip(phantom, max_width)] | |
lcorner = corner + sep*(len(prefix)-1) if len(prefix) >= 1 else u'' | |
rcorner = sep*(len(postfix)-1) + corner if len(postfix) >= 1 else u'' | |
delim = itertools.repeat(colsep) if delim is None else itertools.cycle(delim) | |
corner_delim = (delim_to_corner(delim, sep, corner) | |
if corner_delim is None else itertools.cycle(corner_delim)) | |
row_separator = (sep*w for w in max_width) | |
row_separator = (lcorner | |
+''.join(list(ui.iterjoin(corner_delim, row_separator))) | |
+rcorner) | |
dispatch = {'center':unicode.center, 'right':unicode.rjust, 'left':unicode.ljust} | |
try: justify = itertools.cycle([dispatch[item] for item in justify]) | |
except KeyError: justify = itertools.repeat(dispatch[justify.lower()]) | |
result = [] | |
for physical_rows in logical_rows: | |
row_result = [] | |
for row in physical_rows: | |
if separate_empty_lines and not ''.join(row).strip(): | |
row_result.append(row_separator) | |
else: | |
pieces = [justifier(item, w) for (item, w, justifier) | |
in zip(row, max_width, justify)] | |
row_result.append( | |
prefix | |
+ u''.join(list(ui.iterjoin(delim, pieces))) | |
+ postfix ) | |
result.append(u'\n'.join(row_result)) | |
if has_header and not separate_rows: | |
result.insert(1, row_separator) | |
has_header = False | |
joiner = u'\n'+row_separator+u'\n' if separate_rows else u'\n' | |
result = joiner.join(result) | |
top_framed, bottom_framed = framed | |
if top_framed: result = row_separator+u'\n'+result | |
if bottom_framed: result = result+u'\n'+row_separator | |
return result, max_width | |
def multitable(*datasets, **kwargs): | |
''' | |
Calculate min_widths | |
Calculate total_length | |
For all non-aligned rows: | |
Expand row to total_length | |
Calculate column_locations | |
For all aligned rows: | |
Enumerate all possible expansions of row to total_length (candidate_rows) | |
For each candidate row: | |
Record key=(number successfully aligned columns, | |
variation from proportional expansion) | |
Sort candidate_rows according to key | |
Set row to candidate_rows[0] | |
''' | |
datasets = [normalize(data) for data in datasets] | |
verbose = kwargs.get('verbose', False) | |
looseness = kwargs.get('looseness', 0) | |
kws = itertools.cycle(kwargs.get('kws', [{}])) | |
min_widths, delim_widths, total_lengths = [], [], [] | |
for data, kw in zip(datasets, kws): | |
result, min_width = tableinfo(data, **kw) | |
min_widths.append(min_width) | |
delim = kw.get('delim', None) | |
delim = itertools.repeat(u' │ ') if delim is None else itertools.cycle(delim) | |
delim_width = ( [len(kw.get('prefix', u'│ '))] | |
+ [len(d) for _, d in zip(min_width[1:], delim)] | |
+ [len(kw.get('postfix', u' │'))] ) | |
delim_widths.append(delim_width) | |
total_lengths.append(sum(min_width)+sum(delim_width)) | |
total_length = max(total_lengths)+looseness | |
if verbose: | |
logger.info('total_length: {0}'.format(total_length)) | |
logger.info('min_widths: {0}'.format(min_widths)) | |
new_widths = [] | |
for width, kw, delim_width in zip(min_widths, kws, delim_widths): | |
if kw.get('align', False): | |
new_widths.append(width) | |
else: | |
new_widths.append(expand_widths(width, total_length-sum(delim_width))) | |
left_column_locations = set() | |
right_column_locations = set() | |
for width, kw, delim_width in zip(new_widths, kws, delim_widths): | |
if not kw.get('align', False): | |
loc = width_to_locations(width, delim_width) | |
left_column_locations = left_column_locations.union(loc[::2]) | |
right_column_locations = right_column_locations.union(loc[1::2]) | |
if verbose: | |
logger.info('left_column_locations: {0}'.format( | |
sorted(left_column_locations))) | |
logger.info('right_column_locations: {0}'.format( | |
sorted(right_column_locations))) | |
final_widths = [] | |
for width, kw, delim_width in zip(new_widths, kws, delim_widths): | |
candidates = [] | |
target_length = total_length-sum(delim_width) | |
if kw.get('align', False): | |
proportional_width = expand_widths(width, target_length) | |
for candidate_width in enumerate_widths(width, target_length): | |
variation = sum((a-b)**2 | |
for a, b in zip(candidate_width, proportional_width)) | |
candidate_locations = width_to_locations(candidate_width, delim_width) | |
left_hits = len(left_column_locations.intersection( | |
candidate_locations[::2])) | |
right_hits = len(right_column_locations.intersection( | |
candidate_locations[1::2])) | |
hits = left_hits+right_hits | |
candidates.append((candidate_width, -hits, variation)) | |
if verbose: | |
logger.info('candidate: {c}, hits: {h}, var: {v}'.format( | |
c = candidate_locations, | |
h = hits, | |
v = variation)) | |
candidates.sort(key = lambda x: x[1:]) | |
if verbose: | |
logger.info('winner: {w}'.format(w = candidates[0])) | |
final_widths.append(candidates[0][0]) | |
else: | |
final_widths.append(width) | |
result = [ | |
table(data, **dict(kw, width = width)) | |
for data, width, kw in zip(datasets, final_widths, kws) ] | |
result = u'\n'.join(result) | |
return result | |
def width_to_locations(width, delim_width): | |
return tuple(ui.cumsum((ui.iterjoin(width, delim_width)))) | |
def enumerate_widths(min_width, total_length): | |
total_min_width = sum(min_width) | |
diff = total_length-total_min_width+len(min_width) | |
for indices in ui.partition_indices(length = diff, groups = len(min_width)): | |
yield [end-begin+width-1 for (begin, end), width in zip(indices, min_width)] | |
def table(rows, | |
sep = u'-', | |
corner = u'|', | |
delim = [' | '], | |
corner_delim = ['-+-'], | |
prefix = u'| ', | |
postfix = u' |', | |
has_header = False, | |
header = None, | |
separate_rows = False, | |
framed = (True, True), | |
separate_empty_lines = True, | |
justify = 'right', | |
wrapfunc = lambda x:x, | |
width = None, | |
**kw): | |
''' | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ rows │ A sequence of sequences of items, one sequence per row. │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ framed │ row separator on top and bottom │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ sep │ Character to be used for the row separator line (if │ | |
│ │ has_header==True or separate_rows==True). │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ delim │ A sequence of column delimiters. The delimiters are │ | |
│ │ repeated in a cycle │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ corner_delim │ A sequence of column delimiters used in row separators, │ | |
│ │ repeated in a cycle. │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ prefix │ A string prepended to each printed row. │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ postfix │ A string appended to each printed row. │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ has_header │ True if there is a row separator between the first and │ | |
│ │ second row │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ separate_rows │ True if all rows are to be separated │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ framed │ True if top (and/or bottom) have a row separator │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ separate_empty_lines │ replace empty lines with a row separator │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ justify │ Determines how the data is justified in each column. │ | |
│ │ Valid values are 'left','right' and 'center', or a list │ | |
│ │ of such values (one element per column). │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ wrapfunc │ A function f(text), or list of functions, for wrapping │ | |
│ │ text; each element in the table is first wrapped by │ | |
│ │ this function. If wrapfunc is a list of functions, then │ | |
│ │ table will apply one function per column. │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
│ width │ A list of column widths. If None, the widths will be │ | |
│ │ calculated. │ | |
·──────────────────────·─────────────────────────────────────────────────────────· | |
''' | |
result, max_width = tableinfo( | |
normalize(rows), sep = sep, corner = corner, delim = delim, | |
corner_delim = corner_delim, | |
prefix = prefix, postfix = postfix, has_header = has_header, header = header, | |
separate_rows = separate_rows, framed = framed, | |
separate_empty_lines = separate_empty_lines, justify = justify, | |
wrapfunc = wrapfunc, width = width) | |
return result | |
def ascii_table(rows, | |
sep = '-', | |
corner = '+', | |
delim = [' | '], | |
corner_delim = None, | |
prefix = u'| ', | |
postfix = u' |', | |
has_header = False, | |
header = None, | |
separate_rows = False, | |
framed = (True, True), | |
separate_empty_lines = True, | |
justify = 'right', | |
wrapfunc = lambda x:x, | |
width = None, | |
**kw): | |
result, max_width = tableinfo( | |
normalize(rows), sep = sep, corner = corner, delim = delim, | |
corner_delim = corner_delim, | |
prefix = prefix, postfix = postfix, has_header = has_header, header = header, | |
separate_rows = separate_rows, framed = framed, | |
separate_empty_lines = separate_empty_lines, justify = justify, | |
wrapfunc = wrapfunc, width = width) | |
return result | |
def normalize(rows): | |
new_rows = [] | |
for row in rows: | |
new_rows.append([unicode(elt).expandtabs() for elt in row]) | |
return new_rows | |
def delim_to_corner(delim, sep, corner): | |
for d in delim: | |
d = d.replace(u'│', corner).replace(u'|', corner) | |
for c in '< >': d = d.replace(c, sep) | |
yield d | |
def expand_widths(widths, target_size): | |
new_widths = [] | |
for i, w in enumerate(widths): | |
total_length = sum(widths[i:]) | |
new_width = int(round(float(w)/total_length*target_size)+0.1) if w else 0 | |
target_size -= new_width | |
new_widths.append(new_width) | |
return new_widths | |
def wrap_onspace(text, width): | |
# written by Mike Brown | |
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061 | |
''' | |
A word-wrap function that preserves existing line breaks | |
and most spaces in the text. Expects that existing line | |
breaks are posix newlines (\n). | |
''' | |
words = iter(text.split(' ')) | |
line = next(words) | |
for word in words: | |
contemplated_width = (len(line[line.rfind('\n')+1:]) + | |
len(word.split('\n', 1)[0])) | |
if contemplated_width >= width: | |
line += '\n'+word | |
else: | |
line += ' '+word | |
return line | |
def wrap_onspace_strict(text, width): | |
'''Similar to wrap_onspace, but enforces the width constraint: | |
words longer than width are split.''' | |
word_pat = re.compile(r'\S{'+unicode(width)+r',}') | |
return wrap_onspace(word_pat.sub(lambda m: wrap_always(m.group(), width), text), | |
width) | |
def wrap_always(text, width): | |
'''A simple word-wrap function that wraps text on exactly width characters. | |
It doesn\'t split the text in words.''' | |
return '\n'.join( text[width*i:width*(i+1)] | |
for i in xrange(int(math.ceil(1.*len(text)/width))) ) | |
def onspace(width): | |
return functools.partial(wrap_onspace, width = width) | |
def strict(width): | |
return functools.partial(wrap_onspace_strict, width = width) | |
def always(width): | |
return functools.partial(wrap_always, width = width) | |
def table_array(arr, *args, **kw): | |
'''table_array automatically labels the columns with `dtype.names` | |
''' | |
fields = arr.dtype.names | |
data = [fields] | |
data.extend([[row[field] | |
for field in fields] | |
for row in arr]) | |
return table(data, has_header = True, *args, **kw) | |
def fmt(sequence, digits): | |
elt_fmt = functools.partial('{elt:.{digits}f}'.format, digits = digits) | |
def fmt(elt): | |
try: return elt_fmt(elt = elt) | |
except ValueError: return elt | |
return map(fmt, sequence) | |
if __name__ == '__main__': | |
labels = ('First Name', 'Last Name', 'Age', 'Position') | |
data = '''\ | |
John,Smith,24,Software Engineer | |
Mary,Brohowski,23,Sales Manager | |
Aristidis,Papageorgopoulos,28,Senior Researcher''' | |
rows = [row.strip().split(',') for row in data.splitlines()] | |
print('Without wrapping function') | |
print(table([labels]+rows, has_header = True)) | |
width = 10 | |
for wrapper in (wrap_always, wrap_onspace, wrap_onspace_strict): | |
print('Wrapping function: %s(x,width=%d):' % (wrapper.__name__, width)) | |
print(table([labels]+rows, has_header = True, separate_rows = True, | |
prefix = u'│ ', postfix = u' │', | |
wrapfunc = lambda x: wrapper(x, width))) | |
# output: | |
# Without wrapping function | |
# ·────────────·──────────────────·─────·───────────────────· | |
# │ First Name │ Last Name │ Age │ Position │ | |
# ·────────────·──────────────────·─────·───────────────────· | |
# │ John │ Smith │ 24 │ Software Engineer │ | |
# │ Mary │ Brohowski │ 23 │ Sales Manager │ | |
# │ Aristidis │ Papageorgopoulos │ 28 │ Senior Researcher │ | |
# ·────────────·──────────────────·─────·───────────────────· | |
# Wrapping function: wrap_always(x,width=10): | |
# ·────────────·────────────·─────·────────────· | |
# │ First Name │ Last Name │ Age │ Position │ | |
# ·────────────·────────────·─────·────────────· | |
# │ John │ Smith │ 24 │ Software E │ | |
# │ │ │ │ ngineer │ | |
# ·────────────·────────────·─────·────────────· | |
# │ Mary │ Brohowski │ 23 │ Sales Mana │ | |
# │ │ │ │ ger │ | |
# ·────────────·────────────·─────·────────────· | |
# │ Aristidis │ Papageorgo │ 28 │ Senior Res │ | |
# │ │ poulos │ │ earcher │ | |
# ·────────────·────────────·─────·────────────· | |
# Wrapping function: wrap_onspace(x,width=10): | |
# ·────────────·──────────────────·─────·────────────· | |
# │ First Name │ Last Name │ Age │ Position │ | |
# ·────────────·──────────────────·─────·────────────· | |
# │ John │ Smith │ 24 │ Software │ | |
# │ │ │ │ Engineer │ | |
# ·────────────·──────────────────·─────·────────────· | |
# │ Mary │ Brohowski │ 23 │ Sales │ | |
# │ │ │ │ Manager │ | |
# ·────────────·──────────────────·─────·────────────· | |
# │ Aristidis │ Papageorgopoulos │ 28 │ Senior │ | |
# │ │ │ │ Researcher │ | |
# ·────────────·──────────────────·─────·────────────· | |
# Wrapping function: wrap_onspace_strict(x,width=10): | |
# ·────────────·────────────·─────·────────────· | |
# │ First Name │ Last Name │ Age │ Position │ | |
# ·────────────·────────────·─────·────────────· | |
# │ John │ Smith │ 24 │ Software │ | |
# │ │ │ │ Engineer │ | |
# ·────────────·────────────·─────·────────────· | |
# │ Mary │ Brohowski │ 23 │ Sales │ | |
# │ │ │ │ Manager │ | |
# ·────────────·────────────·─────·────────────· | |
# │ Aristidis │ Papageorgo │ 28 │ Senior │ | |
# │ │ poulos │ │ Researcher │ | |
# ·────────────·────────────·─────·────────────· | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment