Skip to content

Instantly share code, notes, and snippets.

@joswr1ght
Created March 18, 2022 13:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joswr1ght/5a180a5d9365ccf4e6f678b2cbbec5c4 to your computer and use it in GitHub Desktop.
Save joswr1ght/5a180a5d9365ccf4e6f678b2cbbec5c4 to your computer and use it in GitHub Desktop.
Test one or more Asciidoc .adoc files for missing document attribute definitions
#!/usr/bin/env python3
# Using one or more adoc files, build a list of all document attributes,
# then search for typo'd or undefined references to the attributes.
#
# 2022-03-18
# Copyright (c) 2022 Joshua Wright <jwright@hasborg.com>
# LICENSE: GPL 3.0
import sys
import os
import re
import glob
from collections import namedtuple
COLORFILE = ""
COLORRESET = ""
PREFIX = "BADATTR: "
AdocRecord = namedtuple("AdocRecord", "filename loc")
def readFile(adocdata: dict, adoc: str, adocfiles: list):
"""
Read the file specfied in adoc, creating a dictionary of lines in adocdata.
If the adoc source uses an `include` directive, read that file's contents
as well, as long as it isn't already in the adocfiles list.
This function is recursive, calling itself when a new file is identified
with `include`.
"""
includere = r"(include::)(.+)\["
with open(adoc) as adocfp:
loc = 0
for line in adocfp.readlines():
loc += 1
adocdata[line] = AdocRecord(adoc, loc) # AdocRecord is a namedtuple
match = re.search(includere, line)
if match:
# This line references another document; process it
# if it's not already in adocfiles
includefile = match.groups()[1]
if (includefile not in adocfiles):
adocfiles.append(includefile)
adocdata = readFile(adocdata, includefile, adocfiles)
return adocdata
if __name__ == "__main__":
adocfiles = []
if (len(sys.argv) == 1):
# No arguments; exit
sys.exit(0)
for adoc in sys.argv[1:]:
if (os.path.splitext(adoc)[1] != ".adoc"):
print(f"{PREFIX}Skipping non-adoc file {adoc}")
continue
else:
# We need to look at all files in this project directory to
# identify the root-level adoc file. Get the file dirname and glob
# the other adoc files in this directory.
adocfiles.append(adoc)
adocfiles += glob.glob(os.path.dirname(adoc) + '/*.adoc')
# Process this list of adoc files to build a set of references; make the
# list unique
adocfiles = list(set(adocfiles))
# adocdata is a dictionary of `line:AdocRecord` where `line` is the adoc
# file source line as the dictionary key
adocdata = {}
# Process each of the files identified on the command line or through
# globbing other .adoc files in the same directories, building a dictionary
# of lines of source with a namedtuple attribute for file name and line
# number/loc.
for adoc in adocfiles:
adocdata = readFile(adocdata, adoc, adocfiles)
# Iterate through adocdata, building a list of document attributes `:foo:`
attr = []
attrre = r"^:(\w+):\s"
for line in adocdata.keys():
match = re.search(attrre, line)
if (match):
attr.append(match.groups()[0])
# With the list of document attributes, look for references that do not
# have a corresponding attribute.
refre = r"{(\w+)}"
problems = 0
for line, lineattr in adocdata.items():
match = re.search(refre, line)
if (match):
ref = match.groups()[0]
if ref not in attr:
problems += 1
print(
f"{PREFIX}Bad attribute in "
f"{COLORFILE}{os.path.basename(lineattr.filename)}:{lineattr.loc}"
f"{COLORRESET}: {{{ref}}}")
sys.exit(problems)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment