Skip to content

Instantly share code, notes, and snippets.

@sideeffect42
Last active April 29, 2023 08:17
Show Gist options
  • Save sideeffect42/b1865f5fe4b1e6b907cb4b50191cc3f0 to your computer and use it in GitHub Desktop.
Save sideeffect42/b1865f5fe4b1e6b907cb4b50191cc3f0 to your computer and use it in GitHub Desktop.
Lint skonfig types' man.rst files by parsing and re-serialising
#!/usr/bin/env python3
import itertools
import os
import re
import sys
import textwrap
import docutils
import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import docutils.frontend
class ManStruct:
def __init__(self, type_name):
self.type_name = type_name
self.abstract = None
self.description = None
self.parameters = {}
self.messages = {}
self.examples = None
self.see_also = []
self.authors = []
self.copying = None
self.custom_sections = {}
def to_rst(self):
title = "cdist-type%s" % self.type_name
sections = [
("NAME", "%s - %s" % (title, self.abstract))
]
if self.description:
sections.append(("DESCRIPTION", self.description))
param_sections = {}
for (p, v) in sorted(self.parameters.items()):
t = ""
if v["deprecated"]:
t += "DEPRECATED "
t += v["type"].upper() + " "
if v["multiple"]:
t += "MULTIPLE "
t += "PARAMETERS"
if t not in param_sections:
param_sections[t] = ""
param_sections[t] += "%s\n%s\n" % (
p, textwrap.indent(v["description"], " "))
for t in ("REQUIRED MULTIPLE PARAMETERS", "REQUIRED PARAMETERS",
"OPTIONAL MULTIPLE PARAMETERS", "OPTIONAL PARAMETERS",
"BOOLEAN PARAMETERS"):
if t in param_sections:
sections.append((t, param_sections[t].rstrip()))
del param_sections[t]
for t in sorted(param_sections):
sections.append((t, param_sections[t].rstrip()))
if self.messages:
sections.append(("MESSAGES", "\n".join(
"%s\n%s" % (msg, textwrap.indent(desc, " "))
for (msg, desc) in self.messages.items())))
if self.examples:
sections.append(("EXAMPLES", self.examples))
if self.custom_sections:
sections += sorted(self.custom_sections.items())
if self.see_also:
sections.append(
("SEE ALSO", "\n".join(
"*" + textwrap.indent(s, " ")[1:]
for s in self.see_also)))
if self.authors:
sections.append(
("AUTHORS", "\n".join(
"*" + textwrap.indent(s, " ")[1:]
for s in self.authors)))
if self.copying:
sections.append(("COPYING", self.copying))
return "%s\n%s\n\n%s" % (
title + "(7)",
(len(title) + 3) * "=",
"\n\n\n".join(
"%s\n%s\n%s" % (t, len(t) * "-", b)
for (t, b) in sections
)
)
def emit_warning(filename, fmtstr, *args):
print(("%s: " + fmtstr) % (filename, *args), file=sys.stderr)
def rec_rawsource(node):
if node.rawsource:
content = [node.rawsource]
else:
content = [rec_rawsource(c) for c in node.children]
if isinstance(node, docutils.nodes.bullet_list):
return "\n".join(
("*" + textwrap.indent(it, " ")[1:])
for it in content)
elif isinstance(node, docutils.nodes.literal_block):
if "code" in node.attributes["classes"]:
node.attributes["classes"].remove("code")
classes = " code-block:: " + node.attributes["classes"][0]
else:
classes = ""
return ("\n..%s\n\n%s" % (
classes,
textwrap.indent("".join(content), " ")))
elif isinstance(node, docutils.nodes.paragraph):
return "".join(content) + "\n\n"
elif isinstance(node, docutils.nodes.block_quote):
return textwrap.indent("".join(content), " ")
elif isinstance(node, docutils.nodes.definition_list):
return "\n".join(content) + "\n"
elif isinstance(node, docutils.nodes.definition_list_item):
return textwrap.indent("".join(content), " ")[3:]
elif isinstance(node, (docutils.nodes.section, docutils.nodes.definition, docutils.nodes.list_item, docutils.nodes.term, docutils.nodes.line)):
return "".join(content).rstrip()
else:
print(type(node), content)
return "".join(content)
def process_section(section):
if not isinstance(section, docutils.nodes.section):
emit_warning(
filename,
"unexpected non-section element: %r",
section)
title = section.children.pop(0)
return (title.astext().upper(), section)
def process_dl(node):
if not isinstance(node, docutils.nodes.definition_list):
raise ValueError("node is not a dl")
def _proc_dli(node):
# pname = node.children.pop(0).astext()
pname = rec_rawsource(node.children.pop(0))
pval = "".join(map(rec_rawsource, node.children))
return (pname, pval)
return list(itertools.chain(map(_proc_dli, node.children)))
def parse_rst(filename):
if docutils.__version_info__ >= (0, 19):
settings = docutils.frontend.get_default_settings(
docutils.parsers.rst.Parser())
else:
settings = docutils.frontend.OptionParser(
components=(docutils.parsers.rst.Parser,)).get_default_values()
with open(filename, "r") as fh:
document = docutils.utils.new_document(fh.name, settings=settings)
docutils.parsers.rst.Parser().parse(fh.read(), document)
(path, manrst) = os.path.split(filename)
if manrst != "man.rst":
emit_warning(filename, "file name is not man.rst, skipping file.")
type_name = os.path.basename(path)
if len(document.children) > 1:
emit_warning(
filename,
"found %u sections in the document, will only process the first",
len(document.children))
top_section = document.children[0]
# man page title
if isinstance(top_section.children[0], docutils.nodes.title):
title = top_section.children.pop(0)
if title.astext() != ("cdist-type%s(7)" % (type_name)):
emit_warning(
filename,
"man page title does not match expected format")
man = ManStruct(type_name)
parameter_title_re = re.compile(r"^\s*(DEPRECATED )?(BOOLEAN|REQUIRED|OPTIONAL)( MULTIPLE)? PARAMETERS\s*$")
for (title, elt) in map(process_section, top_section.children):
if title == "NAME":
(name, abstract) = rec_rawsource(elt).split(" - ", 1)
if name != ("cdist-type%s" % (type_name)):
emit_warning(
filename,
"NAME section does not match expected format/value")
man.abstract = abstract
elif title in ("COPYING", "DESCRIPTION", "EXAMPLES"):
setattr(man, title.lower(), rec_rawsource(elt))
elif title in ("AUTHOR", "AUTHORS"):
for c in elt.children:
if isinstance(c, (docutils.nodes.bullet_list, docutils.nodes.line_block)):
man.authors += [rec_rawsource(c).rstrip() for c in c.children]
else:
# HACK: comma/newline split
man.authors += re.split(
r", *|\n", rec_rawsource(c).rstrip().rstrip("."))
elif title == "SEE ALSO":
for c in elt.children:
if isinstance(c, (docutils.nodes.bullet_list, docutils.nodes.line_block)):
man.see_also += [rec_rawsource(c).rstrip() for c in c.children]
else:
# HACK: comma/newline split
man.see_also += re.split(
r", *|\n", rec_rawsource(c).rstrip().rstrip("."))
elif title == "MESSAGES":
for c in elt.children:
if isinstance(c, docutils.nodes.definition_list):
for (k, v) in process_dl(c):
if k in man.messages:
emit_warning(
filename,
"multiple definitions of message %s", k)
man.messages[k] = v
elif re.match("^none\.?$", c.astext().strip(), flags=re.IGNORECASE):
pass
else:
emit_warning(
filename, "non definition-list in %s section", title)
elif parameter_title_re.match(title): # PARAMETER
m = parameter_title_re.match(title)
for c in elt.children:
if isinstance(c, docutils.nodes.definition_list):
for (k, v) in process_dl(c):
if k in man.parameters:
emit_warning(
filename,
"multiple definitions of parameter %s", k)
man.parameters[k] = {
"deprecated": bool(m.group(1)),
"multiple": bool(m.group(3)),
"type": m.group(2).lower(),
"description": v,
}
elif c.astext().lower() in ("none", "none."):
pass
else:
emit_warning(
filename, "non definition-list in %s section", title)
else:
emit_warning(filename, "found custom section: %s", title)
man.custom_sections[title] = rec_rawsource(elt)
print(man.to_rst())
if __name__ == "__main__":
for filename in sys.argv[1:]:
parse_rst(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment