Skip to content

Instantly share code, notes, and snippets.

@reagle
Last active July 13, 2021 12:00
Show Gist options
  • Save reagle/dad4e59df7b73ba935556ac9638c1703 to your computer and use it in GitHub Desktop.
Save reagle/dad4e59df7b73ba935556ac9638c1703 to your computer and use it in GitHub Desktop.
Wrap text, including semantically by add breaks at terminal punctuation.
#!/usr/bin/env python2.7
"""Wrap text, including semantically by add breaks at
terminal punctuation."""
# Using python 2.7 so portions can be used in Sublime Text 3 plugin
import argparse # http://docs.python.org/dev/library/argparse.html
import logging
import re
import sys
import textwrap
debug = logging.debug
info = logging.info
warn = logging.warn
error = logging.error
critical = logging.critical
exception = logging.exception
SEMANTIC_BREAK_RE = re.compile(
r"""
( # end of sentence includes...
[a-z]{2,}| # end of word
[0-9]{1,} # end of a page or chapter number
)
( # terminal punctuation
\.
|\?
|\!
|\}
|:
|\.\) # period paren
|\)\. # paren period
|\.\] # period bracket
|\]\. # bracket period
|\." # period quote
|"\. # quote quote
|"\)\. # quote paren period
|\?\) # question paren
|\)\? # paren question
|\?\] # question bracket
|\]\? # bracket question
|\?" # question quote
|"\? # quote question
|\!\) # exclaim paren
|\)\! # paren exclaim
|\!\] # exclaim bracket
|\]\! # bracket exclaim
|\!" # exclaim quote
|"\! # quote exclaim
)
(\s) # a whitespace
(?!\d) # negative lookahead for digit
""",
re.VERBOSE,
)
QUOTES_RE = re.compile(
r"""
(^[> ]+)? # start quotes
(.*) # rest of line
""",
re.VERBOSE,
)
def quoted_wrap(content, width):
"""wrap quoted lines, preserving quotes; collapse empty lines"""
wrapper = textwrap.TextWrapper(width=width)
quote_level = quote_level_prev = None
buffer = [
"",
] # hold chunks of similar quote_level
new_content = [] # hold rebuilt content
for line in content.split("\n"):
# quotes = text = ""
info("---")
info("line = '%s'" % line)
quotes, text = QUOTES_RE.match(line).groups()
quote_level = quotes.count(">") if quotes else 0
if quote_level_prev is None: # None on first iteration
info("* no quote_level_prev")
quote_level_prev = quote_level
info("text = '%s'" % (text))
info(
"quote_level = %s quote_level_prev = %s"
% (quote_level, quote_level_prev)
)
# if similar: accumulate lines of paragraph at same quote level
if quote_level == quote_level_prev and text:
buffer.append(text)
info("buffer = %s" % (buffer))
# else: add wrapped lines to new_content, start new buffer
else:
info("* change in quote level or new paragraph")
info("buffer = %s" % (buffer))
prefix = ">" * quote_level_prev + " " if quote_level_prev else ""
wrapper.initial_indent = prefix
wrapper.subsequent_indent = prefix
buffer_joined = " ".join(buffer).strip()
info("buffer_joined = '%s'" % (buffer_joined))
new_content.append(wrapper.fill(buffer_joined))
if not text: # new paragraph
info("* new paragraph")
new_content.append(prefix)
quote_level_prev = quote_level
buffer = [text]
info("NEW_CONTENT = %s" % (new_content))
new_content = "\n".join(new_content) # TODO: remove first empty
new_content = re.sub(r"\n\s*\n", "\n\n", new_content) # collapse empty
return new_content
def semantic_wrap(text):
"""wrap quoted lines at terminal punctuations.
...split('\n\n')...startswith('>') doesn't catch a non-quoted line
followed by quoted lines of changing levels
(e.g., beginning of email: "On DATE, NAME wrote:").
"""
new_content = []
info("START")
for line in re.split("\n\n", text):
info("line = '%s'" % line)
if line.startswith(">"):
info("is quoted")
quotes, line = QUOTES_RE.match(line).groups()
line = line.replace(quotes, "")
wrapped_line = SEMANTIC_BREAK_RE.sub(
r"""\1\2\3\n%s""" % quotes, line
)
info("wrapped_line = '%s'" % wrapped_line)
info("quotes = '%s'" % quotes)
wrapped_line = quotes + wrapped_line + "\n"
else:
wrapped_line = SEMANTIC_BREAK_RE.sub(r"""\1\2\3\n""", line) + "\n"
new_content.append(wrapped_line)
return "\n".join(new_content)
def main(argv):
"""Process arguments"""
arg_parser = argparse.ArgumentParser(
description="a couple different types of line wrappers"
)
# positional arguments
arg_parser.add_argument("files", nargs=1, metavar="FILE")
# optional arguments
arg_parser.add_argument(
"-s",
"--semantic",
action="store_true",
default=False,
help="semantic wrap (at sentence endings)",
)
arg_parser.add_argument(
"-w",
"--wrap",
type=int,
default=72,
help="wrap width (default: %(default)s)",
)
arg_parser.add_argument(
"-o",
"--out-filename",
help="output results to filename",
metavar="FILE",
)
arg_parser.add_argument(
"-L",
"--log-to-file",
action="store_true",
default=False,
help="log to file %(prog)s.log",
)
arg_parser.add_argument(
"-V",
"--verbose",
action="count",
default=0,
help="Increase verbosity (specify multiple times for more)",
)
arg_parser.add_argument("--version", action="version", version="TBD")
args = arg_parser.parse_args(argv)
log_level = 100 # default
if args.verbose >= 3:
log_level = logging.DEBUG # 10
elif args.verbose == 2:
log_level = logging.INFO # 20
elif args.verbose == 1:
log_level = logging.ERROR # 40
LOG_FORMAT = "%(levelno)s %(funcName).5s: %(message)s"
if args.log_to_file:
logging.basicConfig(
filename="wrap.log",
filemode="w",
level=log_level,
format=LOG_FORMAT,
)
else:
logging.basicConfig(level=log_level, format=LOG_FORMAT)
return args
if "__main__" == __name__:
args = main(sys.argv[1:])
content = open(args.files[0]).read()
if args.semantic:
print(semantic_wrap(content))
else:
print(quoted_wrap(content, args.wrap))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment