Skip to content

Instantly share code, notes, and snippets.

@reagle
Last active February 20, 2020 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reagle/06e7f6799dbc07ba039b8451fe499725 to your computer and use it in GitHub Desktop.
Save reagle/06e7f6799dbc07ba039b8451fe499725 to your computer and use it in GitHub Desktop.
Convert string to pandoc auto_identifiers default algorithm. Intended as a CopyQ clipboard function.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# (c) Copyright 2019 by Joseph Reagle
# Licensed under the GPLv3, see <http://www.gnu.org/licenses/gpl-3.0.html>
#
""" Convert string to pandoc auto_identifiers default algorithm.
Intended as a CopyQ clipboard function.
"""
import argparse # http://docs.python.org/dev/library/argparse.html
import doctest
import logging
import re
import sys
critical = logging.critical
error = logging.error
warn = logging.warn
info = logging.info
debug = logging.debug
excpt = logging.exception
def auto_id(s):
"""Convert to pandoc auto_identifer.
https://pandoc.org/MANUAL.html#headings-and-sections
>>> auto_id("Heading identifiers in HTML")
'heading-identifiers-in-html'
>>> auto_id("Maître d'hôtel")
'maître-dhôtel'
>>> auto_id("*Dogs*?--in *my* house?")
'dogs--in-my-house'
>>> auto_id("[HTML], [S5], or [RTF]?")
'html-s5-or-rtf'
>>> auto_id("3. Applications")
'applications'
>>> auto_id("33")
'section'
"""
# Remove all formatting, links, etc.
# Ignored, not in clipboard
# Remove all footnotes.
# ignored, not in clipboard
# Replace all spaces and newlines with hyphens. [jr: moved up]
s = re.sub(r"\s", r"-", s)
info(f"{s=} ==============")
# Remove all non-alphanumeric characters, except underscores, hyphens, and
# periods.
s = re.sub(r"[^\w\-\.]", r"", s, flags=re.UNICODE)
info(f"{s=}")
# Convert all alphabetic characters to lowercase.
s = s.lower()
info(f"{s=}")
# Remove everything up to the first letter
# (identifiers may not begin with number or punctuation mark).
# [jr: first letter *or* underscore]
s = re.sub(r"^[^a-zA-Z_]+", r"", s)
info(f"{s=}")
# If nothing is left after this, use the identifier section.
if not s:
s = "section"
info(f"{s=}")
return s
def main(argv):
"""Process arguments and execute."""
arg_parser = argparse.ArgumentParser(
description="Convert text to pandoc auto_identifiers"
)
# positional arguments
arg_parser.add_argument("text", nargs="*", metavar="TEXT")
arg_parser.add_argument(
"-T", "--test", action="store_true", default=False, help="Test"
)
arg_parser.add_argument(
"-L",
"--log-to-file",
action="store_true",
default=False,
help="log to file %(prog)s.log",
)
arg_parser.add_argument(
"-V",
"--verbose",
action="count",
default=0,
help="Increase verbosity (specify multiple times for more)",
)
arg_parser.add_argument(
"--version",
action="version",
version=f"0.1 using Python {sys.version}",
)
args = arg_parser.parse_args()
log_level = 100 # default
if args.verbose == 1:
log_level = logging.CRITICAL # 50
elif args.verbose == 2:
log_level = logging.INFO # 20
elif args.verbose >= 3:
log_level = logging.DEBUG # 10
LOG_FORMAT = "%(levelno)s %(funcName).5s: %(message)s"
if args.log_to_file:
info("logging to file")
logging.basicConfig(
filename="change_case.log",
filemode="w",
level=log_level,
format=LOG_FORMAT,
)
else:
logging.basicConfig(level=log_level, format=LOG_FORMAT)
if args.test:
doctest.testmod()
else:
text = " ".join(args.text)
result = auto_id(text)
info(result)
print(result)
if "__main__" == __name__:
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment