Last active
April 29, 2023 08:17
-
-
Save sideeffect42/b1865f5fe4b1e6b907cb4b50191cc3f0 to your computer and use it in GitHub Desktop.
Lint skonfig types' man.rst files by parsing and re-serialising
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import itertools | |
import os | |
import re | |
import sys | |
import textwrap | |
import docutils | |
import docutils.nodes | |
import docutils.parsers.rst | |
import docutils.utils | |
import docutils.frontend | |
class ManStruct: | |
def __init__(self, type_name): | |
self.type_name = type_name | |
self.abstract = None | |
self.description = None | |
self.parameters = {} | |
self.messages = {} | |
self.examples = None | |
self.see_also = [] | |
self.authors = [] | |
self.copying = None | |
self.custom_sections = {} | |
def to_rst(self): | |
title = "cdist-type%s" % self.type_name | |
sections = [ | |
("NAME", "%s - %s" % (title, self.abstract)) | |
] | |
if self.description: | |
sections.append(("DESCRIPTION", self.description)) | |
param_sections = {} | |
for (p, v) in sorted(self.parameters.items()): | |
t = "" | |
if v["deprecated"]: | |
t += "DEPRECATED " | |
t += v["type"].upper() + " " | |
if v["multiple"]: | |
t += "MULTIPLE " | |
t += "PARAMETERS" | |
if t not in param_sections: | |
param_sections[t] = "" | |
param_sections[t] += "%s\n%s\n" % ( | |
p, textwrap.indent(v["description"], " ")) | |
for t in ("REQUIRED MULTIPLE PARAMETERS", "REQUIRED PARAMETERS", | |
"OPTIONAL MULTIPLE PARAMETERS", "OPTIONAL PARAMETERS", | |
"BOOLEAN PARAMETERS"): | |
if t in param_sections: | |
sections.append((t, param_sections[t].rstrip())) | |
del param_sections[t] | |
for t in sorted(param_sections): | |
sections.append((t, param_sections[t].rstrip())) | |
if self.messages: | |
sections.append(("MESSAGES", "\n".join( | |
"%s\n%s" % (msg, textwrap.indent(desc, " ")) | |
for (msg, desc) in self.messages.items()))) | |
if self.examples: | |
sections.append(("EXAMPLES", self.examples)) | |
if self.custom_sections: | |
sections += sorted(self.custom_sections.items()) | |
if self.see_also: | |
sections.append( | |
("SEE ALSO", "\n".join( | |
"*" + textwrap.indent(s, " ")[1:] | |
for s in self.see_also))) | |
if self.authors: | |
sections.append( | |
("AUTHORS", "\n".join( | |
"*" + textwrap.indent(s, " ")[1:] | |
for s in self.authors))) | |
if self.copying: | |
sections.append(("COPYING", self.copying)) | |
return "%s\n%s\n\n%s" % ( | |
title + "(7)", | |
(len(title) + 3) * "=", | |
"\n\n\n".join( | |
"%s\n%s\n%s" % (t, len(t) * "-", b) | |
for (t, b) in sections | |
) | |
) | |
def emit_warning(filename, fmtstr, *args): | |
print(("%s: " + fmtstr) % (filename, *args), file=sys.stderr) | |
def rec_rawsource(node): | |
if node.rawsource: | |
content = [node.rawsource] | |
else: | |
content = [rec_rawsource(c) for c in node.children] | |
if isinstance(node, docutils.nodes.bullet_list): | |
return "\n".join( | |
("*" + textwrap.indent(it, " ")[1:]) | |
for it in content) | |
elif isinstance(node, docutils.nodes.literal_block): | |
if "code" in node.attributes["classes"]: | |
node.attributes["classes"].remove("code") | |
classes = " code-block:: " + node.attributes["classes"][0] | |
else: | |
classes = "" | |
return ("\n..%s\n\n%s" % ( | |
classes, | |
textwrap.indent("".join(content), " "))) | |
elif isinstance(node, docutils.nodes.paragraph): | |
return "".join(content) + "\n\n" | |
elif isinstance(node, docutils.nodes.block_quote): | |
return textwrap.indent("".join(content), " ") | |
elif isinstance(node, docutils.nodes.definition_list): | |
return "\n".join(content) + "\n" | |
elif isinstance(node, docutils.nodes.definition_list_item): | |
return textwrap.indent("".join(content), " ")[3:] | |
elif isinstance(node, (docutils.nodes.section, docutils.nodes.definition, docutils.nodes.list_item, docutils.nodes.term, docutils.nodes.line)): | |
return "".join(content).rstrip() | |
else: | |
print(type(node), content) | |
return "".join(content) | |
def process_section(section): | |
if not isinstance(section, docutils.nodes.section): | |
emit_warning( | |
filename, | |
"unexpected non-section element: %r", | |
section) | |
title = section.children.pop(0) | |
return (title.astext().upper(), section) | |
def process_dl(node): | |
if not isinstance(node, docutils.nodes.definition_list): | |
raise ValueError("node is not a dl") | |
def _proc_dli(node): | |
# pname = node.children.pop(0).astext() | |
pname = rec_rawsource(node.children.pop(0)) | |
pval = "".join(map(rec_rawsource, node.children)) | |
return (pname, pval) | |
return list(itertools.chain(map(_proc_dli, node.children))) | |
def parse_rst(filename): | |
if docutils.__version_info__ >= (0, 19): | |
settings = docutils.frontend.get_default_settings( | |
docutils.parsers.rst.Parser()) | |
else: | |
settings = docutils.frontend.OptionParser( | |
components=(docutils.parsers.rst.Parser,)).get_default_values() | |
with open(filename, "r") as fh: | |
document = docutils.utils.new_document(fh.name, settings=settings) | |
docutils.parsers.rst.Parser().parse(fh.read(), document) | |
(path, manrst) = os.path.split(filename) | |
if manrst != "man.rst": | |
emit_warning(filename, "file name is not man.rst, skipping file.") | |
type_name = os.path.basename(path) | |
if len(document.children) > 1: | |
emit_warning( | |
filename, | |
"found %u sections in the document, will only process the first", | |
len(document.children)) | |
top_section = document.children[0] | |
# man page title | |
if isinstance(top_section.children[0], docutils.nodes.title): | |
title = top_section.children.pop(0) | |
if title.astext() != ("cdist-type%s(7)" % (type_name)): | |
emit_warning( | |
filename, | |
"man page title does not match expected format") | |
man = ManStruct(type_name) | |
parameter_title_re = re.compile(r"^\s*(DEPRECATED )?(BOOLEAN|REQUIRED|OPTIONAL)( MULTIPLE)? PARAMETERS\s*$") | |
for (title, elt) in map(process_section, top_section.children): | |
if title == "NAME": | |
(name, abstract) = rec_rawsource(elt).split(" - ", 1) | |
if name != ("cdist-type%s" % (type_name)): | |
emit_warning( | |
filename, | |
"NAME section does not match expected format/value") | |
man.abstract = abstract | |
elif title in ("COPYING", "DESCRIPTION", "EXAMPLES"): | |
setattr(man, title.lower(), rec_rawsource(elt)) | |
elif title in ("AUTHOR", "AUTHORS"): | |
for c in elt.children: | |
if isinstance(c, (docutils.nodes.bullet_list, docutils.nodes.line_block)): | |
man.authors += [rec_rawsource(c).rstrip() for c in c.children] | |
else: | |
# HACK: comma/newline split | |
man.authors += re.split( | |
r", *|\n", rec_rawsource(c).rstrip().rstrip(".")) | |
elif title == "SEE ALSO": | |
for c in elt.children: | |
if isinstance(c, (docutils.nodes.bullet_list, docutils.nodes.line_block)): | |
man.see_also += [rec_rawsource(c).rstrip() for c in c.children] | |
else: | |
# HACK: comma/newline split | |
man.see_also += re.split( | |
r", *|\n", rec_rawsource(c).rstrip().rstrip(".")) | |
elif title == "MESSAGES": | |
for c in elt.children: | |
if isinstance(c, docutils.nodes.definition_list): | |
for (k, v) in process_dl(c): | |
if k in man.messages: | |
emit_warning( | |
filename, | |
"multiple definitions of message %s", k) | |
man.messages[k] = v | |
elif re.match("^none\.?$", c.astext().strip(), flags=re.IGNORECASE): | |
pass | |
else: | |
emit_warning( | |
filename, "non definition-list in %s section", title) | |
elif parameter_title_re.match(title): # PARAMETER | |
m = parameter_title_re.match(title) | |
for c in elt.children: | |
if isinstance(c, docutils.nodes.definition_list): | |
for (k, v) in process_dl(c): | |
if k in man.parameters: | |
emit_warning( | |
filename, | |
"multiple definitions of parameter %s", k) | |
man.parameters[k] = { | |
"deprecated": bool(m.group(1)), | |
"multiple": bool(m.group(3)), | |
"type": m.group(2).lower(), | |
"description": v, | |
} | |
elif c.astext().lower() in ("none", "none."): | |
pass | |
else: | |
emit_warning( | |
filename, "non definition-list in %s section", title) | |
else: | |
emit_warning(filename, "found custom section: %s", title) | |
man.custom_sections[title] = rec_rawsource(elt) | |
print(man.to_rst()) | |
if __name__ == "__main__": | |
for filename in sys.argv[1:]: | |
parse_rst(filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment