Skip to content

Instantly share code, notes, and snippets.

@richlowe
Created February 27, 2011 03:37
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save richlowe/845888 to your computer and use it in GitHub Desktop.
roff(1) Line Mangler And Organizer
#!/usr/bin/env python
# roff(1) Line Mangler And Organizer
#
# Reflow a roff (source) document without damaging it:
#
# - Comments are left intact
# - Commands are left intact
# - Known "Verbatim blocks" are left intact
# - tbl(1) Tables
# - Unflowed sections
import sys
import textwrap
# dict of verbatim blocks start command --> end command
VERBATIM_BLOCKS = {'.TS': '.TE', # Table Start / Table End
'.nf': '.fi', # No-fill / Fill
}
# line-tokens
#
# COMMENT - roff comment, .\" or '\"
#
# TEXT - plain text
#
# COMMAND - a roff command (must begin a line)
#
# VERBATIM - Part of a verbatim block (see VERBATIM_BLOCKS)
#
# BLANK - Blank lines in roff input are significant. If we treat them as TEXT
# they may be flowed out of existence, but they almost always exist in
# input for their affect on output; instead treat them as a command
# ("insert a blank line"), which seems logical and gives the right
# behaviour.
COMMENT, TEXT, COMMAND, VERBATIM, BLANK = range(5)
def tokenize(inp):
"""'tokenize' an nroff page on INP.
returns a list of lists [TOKEN, 'text'], runs of the same token are packed
together."""
ret = []
lasttok = None
# Stack of verbatim block ending commands in the order we need to see them
# to leave the block. Also treated as bool to indicate lines should be
# passed verbatim.
inverb = []
for line in inp:
tok = None
if (line.startswith('.\\"') or line.startswith('\'\\"')):
tok = COMMENT
if not line or line.isspace():
tok = BLANK
elif line[0] in (".", "'"):
tok = COMMAND
command = line.split()[0]
# command starts a verbatim block
if command in VERBATIM_BLOCKS:
inverb.append(VERBATIM_BLOCKS[command])
elif inverb and command == inverb[-1]:
inverb.pop()
else:
tok = inverb and VERBATIM or TEXT
if tok == lasttok:
ret[-1][1] += line
else:
ret.append([tok, line])
lasttok = tok
return ret
Wrap = textwrap.TextWrapper(width=79, expand_tabs=False,
replace_whitespace=False,
drop_whitespace=True,
fix_sentence_endings=False,
break_on_hyphens=False)
def flow_paragraph(text):
newlines = Wrap.wrap(text)
# We can't allow a non-COMMAND line to start with a period or a single
# quote, if we wrap a line in such a way that we do, pull the last word of
# the previous line down to prevent it.
for n, line in enumerate(newlines):
while line[0] in (".", "'"):
ns = newlines[n - 1].split(' ')
prev, prep = ns[:-1], ns[-1]
newlines[n - 1] = ' '.join(prev)
newlines[n] = "%s %s" % (prep, line)
line = newlines[n]
return newlines
def reflow(lines, outp=sys.stdout):
"""Reflow an nroff document, in LINES writing a new document to OUTP
(default: sys.stdout)"""
for tok, text in lines:
if tok in (COMMENT, COMMAND, VERBATIM, BLANK):
outp.write(text)
elif tok == TEXT:
outp.write('\n'.join(flow_paragraph(text)) + '\n')
else:
raise Exception("Unknown token value `%s'" % tok)
if __name__ == '__main__':
if len(sys.argv) != 3:
sys.stderr.write("Usage: rofflmao <infile> <outfile>\n")
sys.exit(2)
infile, outfile = sys.argv[1:3]
if infile == outfile:
sys.stderr.write("Input and output must differ\n")
sys.exit(1)
with open(infile, 'r') as f:
# sys.stdout.write(''.join(map(lambda (x,y): "%s: %s" % (x, y),
# tokenize(f))))
with open(outfile, 'w') as n:
reflow(tokenize(f), outp=n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment