Last active
February 20, 2020 23:09
-
-
Save reagle/06e7f6799dbc07ba039b8451fe499725 to your computer and use it in GitHub Desktop.
Convert string to pandoc auto_identifiers default algorithm. Intended as a CopyQ clipboard function.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# | |
# (c) Copyright 2019 by Joseph Reagle | |
# Licensed under the GPLv3, see <http://www.gnu.org/licenses/gpl-3.0.html> | |
# | |
""" Convert string to pandoc auto_identifiers default algorithm. | |
Intended as a CopyQ clipboard function. | |
""" | |
import argparse # http://docs.python.org/dev/library/argparse.html | |
import doctest | |
import logging | |
import re | |
import sys | |
critical = logging.critical | |
error = logging.error | |
warn = logging.warn | |
info = logging.info | |
debug = logging.debug | |
excpt = logging.exception | |
def auto_id(s): | |
"""Convert to pandoc auto_identifer. | |
https://pandoc.org/MANUAL.html#headings-and-sections | |
>>> auto_id("Heading identifiers in HTML") | |
'heading-identifiers-in-html' | |
>>> auto_id("Maître d'hôtel") | |
'maître-dhôtel' | |
>>> auto_id("*Dogs*?--in *my* house?") | |
'dogs--in-my-house' | |
>>> auto_id("[HTML], [S5], or [RTF]?") | |
'html-s5-or-rtf' | |
>>> auto_id("3. Applications") | |
'applications' | |
>>> auto_id("33") | |
'section' | |
""" | |
# Remove all formatting, links, etc. | |
# Ignored, not in clipboard | |
# Remove all footnotes. | |
# ignored, not in clipboard | |
# Replace all spaces and newlines with hyphens. [jr: moved up] | |
s = re.sub(r"\s", r"-", s) | |
info(f"{s=} ==============") | |
# Remove all non-alphanumeric characters, except underscores, hyphens, and | |
# periods. | |
s = re.sub(r"[^\w\-\.]", r"", s, flags=re.UNICODE) | |
info(f"{s=}") | |
# Convert all alphabetic characters to lowercase. | |
s = s.lower() | |
info(f"{s=}") | |
# Remove everything up to the first letter | |
# (identifiers may not begin with number or punctuation mark). | |
# [jr: first letter *or* underscore] | |
s = re.sub(r"^[^a-zA-Z_]+", r"", s) | |
info(f"{s=}") | |
# If nothing is left after this, use the identifier section. | |
if not s: | |
s = "section" | |
info(f"{s=}") | |
return s | |
def main(argv): | |
"""Process arguments and execute.""" | |
arg_parser = argparse.ArgumentParser( | |
description="Convert text to pandoc auto_identifiers" | |
) | |
# positional arguments | |
arg_parser.add_argument("text", nargs="*", metavar="TEXT") | |
arg_parser.add_argument( | |
"-T", "--test", action="store_true", default=False, help="Test" | |
) | |
arg_parser.add_argument( | |
"-L", | |
"--log-to-file", | |
action="store_true", | |
default=False, | |
help="log to file %(prog)s.log", | |
) | |
arg_parser.add_argument( | |
"-V", | |
"--verbose", | |
action="count", | |
default=0, | |
help="Increase verbosity (specify multiple times for more)", | |
) | |
arg_parser.add_argument( | |
"--version", | |
action="version", | |
version=f"0.1 using Python {sys.version}", | |
) | |
args = arg_parser.parse_args() | |
log_level = 100 # default | |
if args.verbose == 1: | |
log_level = logging.CRITICAL # 50 | |
elif args.verbose == 2: | |
log_level = logging.INFO # 20 | |
elif args.verbose >= 3: | |
log_level = logging.DEBUG # 10 | |
LOG_FORMAT = "%(levelno)s %(funcName).5s: %(message)s" | |
if args.log_to_file: | |
info("logging to file") | |
logging.basicConfig( | |
filename="change_case.log", | |
filemode="w", | |
level=log_level, | |
format=LOG_FORMAT, | |
) | |
else: | |
logging.basicConfig(level=log_level, format=LOG_FORMAT) | |
if args.test: | |
doctest.testmod() | |
else: | |
text = " ".join(args.text) | |
result = auto_id(text) | |
info(result) | |
print(result) | |
if "__main__" == __name__: | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment