Skip to content

Instantly share code, notes, and snippets.

@charles-l
Created August 21, 2022 01:37
Show Gist options
  • Save charles-l/12a3eb7b364acf322ec607cb610221e2 to your computer and use it in GitHub Desktop.
Save charles-l/12a3eb7b364acf322ec607cb610221e2 to your computer and use it in GitHub Desktop.
# Python implementation of the wordwrap algorithm from
# http://blogs.perl.org/users/damian_conway/2019/08/greed-is-good-balance-is-better-beauty-is-best.html
import math
import re
def cost(lines, width):
# NOTE: not skipping the last line in the uniformity cost since
# if there's no way of splitting in greedy_wrap, the last line ends up
# with everything on it.
uniformity = sum(abs((width - len(l)))**3 for l in lines)
compactness = len(lines)**3
widow_count = sum(1 for line in lines if
re.search(r'[.!?,:;]\s+\S+\s*$', line))
orphan_count = sum(1 for line in lines if
re.search('^\S+[.!?,:;](\s|$)', line))
return uniformity * compactness * (1 + 10 * (widow_count + orphan_count))
def greedy_wrap(text, width=80, minwidth=None, minbreak=5):
lines = []
if minwidth is None:
minwidth = math.floor(0.8 * width)
while text:
if (g := re.match(fr'.{{1,{width}}}$', text)):
lines.append(g.group(0))
text = text[g.end():]
elif (g := re.match(fr'.{{{minwidth},{width}}} ', text)):
lines.append(g.group(0))
text = text[g.end():]
elif ((g := re.match(fr'.{{{minbreak},{width-1}}}', text)) and
re.match(fr'\S{{{minbreak}}}', text[g.end():])):
lines.append(g.group(0) + '-')
text = text[g.end():]
else:
# raku's comb method somehow never runs across this case in the
# examples. I'm not sure if it's doing some crazy backtracking or
# if I ported the patterns wrong, but I'm just punting on it.
lines.append(text)
break
return '\n'.join(lines)
def iterative_wrap(text, width=80):
best_wrapping = text
lowest_cost = float('inf')
prev_max_width = float('inf')
for next_width in range(width, math.floor(0.9*width), -1):
if next_width > prev_max_width:
continue
wrapping = greedy_wrap(text, next_width)
wrap_cost = cost(wrapping.split('\n'), next_width)
if wrap_cost < lowest_cost:
best_wrapping = wrapping
lowest_cost = wrap_cost
prev_max_width = max(len(line) for line in wrapping.split('\n')) - 1
return best_wrapping
print(iterative_wrap('''\
Look you, I shall have to be terminating my interdisciplinary
investigation of consanguineous antidisestablishmentarianism
in Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch.
For I've just been electrophotomicrographically diagnosed with
pseudopneumonoultramicroscopicsilicovolcanoconiosis, isn't it?'''.replace('\n',
' '),
45))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment