joswr1ght/test_badattributes.py

## test_badattributes.py
#!/usr/bin/env python3
# Using one or more adoc files, build a list of all document attributes,
# then search for typo'd or undefined references to the attributes.
#
# 2022-03-18
# Copyright (c) 2022 Joshua Wright <jwright@hasborg.com>
# LICENSE: GPL 3.0

import sys
import os
import re
import glob
from collections import namedtuple

COLORFILE = "[36m"
COLORRESET = "[0m"
PREFIX = "BADATTR: "

AdocRecord = namedtuple("AdocRecord", "filename loc")


def readFile(adocdata: dict, adoc: str, adocfiles: list):
    """
    Read the file specfied in adoc, creating a dictionary of lines in adocdata.
    If the adoc source uses an `include` directive, read that file's contents
    as well, as long as it isn't already in the adocfiles list.

    This function is recursive, calling itself when a new file is identified
    with `include`.
    """
    includere = r"(include::)(.+)\["

    with open(adoc) as adocfp:
        loc = 0
        for line in adocfp.readlines():
            loc += 1
            adocdata[line] = AdocRecord(adoc, loc)  # AdocRecord is a namedtuple
            match = re.search(includere, line)
            if match:
                # This line references another document; process it
                # if it's not already in adocfiles
                includefile = match.groups()[1]
                if (includefile not in adocfiles):
                    adocfiles.append(includefile)
                    adocdata = readFile(adocdata, includefile, adocfiles)
    return adocdata


if __name__ == "__main__":

    adocfiles = []

    if (len(sys.argv) == 1):
        # No arguments; exit
        sys.exit(0)

    for adoc in sys.argv[1:]:
        if (os.path.splitext(adoc)[1] != ".adoc"):
            print(f"{PREFIX}Skipping non-adoc file {adoc}")
            continue
        else:
            # We need to look at all files in this project directory to
            # identify the root-level adoc file. Get the file dirname and glob
            # the other adoc files in this directory.
            adocfiles.append(adoc)
            adocfiles += glob.glob(os.path.dirname(adoc) + '/*.adoc')

    # Process this list of adoc files to build a set of references; make the
    # list unique
    adocfiles = list(set(adocfiles))

    # adocdata is a dictionary of `line:AdocRecord` where `line` is the adoc
    # file source line as the dictionary key
    adocdata = {}

    # Process each of the files identified on the command line or through
    # globbing other .adoc files in the same directories, building a dictionary
    # of lines of source with a namedtuple attribute for file name and line
    # number/loc.
    for adoc in adocfiles:
        adocdata = readFile(adocdata, adoc, adocfiles)

    # Iterate through adocdata, building a list of document attributes `:foo:`
    attr = []
    attrre = r"^:(\w+):\s"
    for line in adocdata.keys():
        match = re.search(attrre, line)
        if (match):
            attr.append(match.groups()[0])

    # With the list of document attributes, look for references that do not
    # have a corresponding attribute.
    refre = r"{(\w+)}"
    problems = 0
    for line, lineattr in adocdata.items():
        match = re.search(refre, line)
        if (match):
            ref = match.groups()[0]
            if ref not in attr:
                problems += 1
                print(
                    f"{PREFIX}Bad attribute in "
                    f"{COLORFILE}{os.path.basename(lineattr.filename)}:{lineattr.loc}"
                    f"{COLORRESET}: {{{ref}}}")

    sys.exit(problems)
	#!/usr/bin/env python3
	# Using one or more adoc files, build a list of all document attributes,
	# then search for typo'd or undefined references to the attributes.
	#
	# 2022-03-18
	# Copyright (c) 2022 Joshua Wright <jwright@hasborg.com>
	# LICENSE: GPL 3.0

	import sys
	import os
	import re
	import glob
	from collections import namedtuple

	COLORFILE = "[36m"
	COLORRESET = "[0m"
	PREFIX = "BADATTR: "

	AdocRecord = namedtuple("AdocRecord", "filename loc")


	def readFile(adocdata: dict, adoc: str, adocfiles: list):
	"""
	Read the file specfied in adoc, creating a dictionary of lines in adocdata.
	If the adoc source uses an `include` directive, read that file's contents
	as well, as long as it isn't already in the adocfiles list.

	This function is recursive, calling itself when a new file is identified
	with `include`.
	"""
	includere = r"(include::)(.+)\["

	with open(adoc) as adocfp:
	loc = 0
	for line in adocfp.readlines():
	loc += 1
	adocdata[line] = AdocRecord(adoc, loc) # AdocRecord is a namedtuple
	match = re.search(includere, line)
	if match:
	# This line references another document; process it
	# if it's not already in adocfiles
	includefile = match.groups()[1]
	if (includefile not in adocfiles):
	adocfiles.append(includefile)
	adocdata = readFile(adocdata, includefile, adocfiles)
	return adocdata


	if __name__ == "__main__":

	adocfiles = []

	if (len(sys.argv) == 1):
	# No arguments; exit
	sys.exit(0)

	for adoc in sys.argv[1:]:
	if (os.path.splitext(adoc)[1] != ".adoc"):
	print(f"{PREFIX}Skipping non-adoc file {adoc}")
	continue
	else:
	# We need to look at all files in this project directory to
	# identify the root-level adoc file. Get the file dirname and glob
	# the other adoc files in this directory.
	adocfiles.append(adoc)
	adocfiles += glob.glob(os.path.dirname(adoc) + '/*.adoc')

	# Process this list of adoc files to build a set of references; make the
	# list unique
	adocfiles = list(set(adocfiles))

	# adocdata is a dictionary of `line:AdocRecord` where `line` is the adoc
	# file source line as the dictionary key
	adocdata = {}

	# Process each of the files identified on the command line or through
	# globbing other .adoc files in the same directories, building a dictionary
	# of lines of source with a namedtuple attribute for file name and line
	# number/loc.
	for adoc in adocfiles:
	adocdata = readFile(adocdata, adoc, adocfiles)

	# Iterate through adocdata, building a list of document attributes `:foo:`
	attr = []
	attrre = r"^:(\w+):\s"
	for line in adocdata.keys():
	match = re.search(attrre, line)
	if (match):
	attr.append(match.groups()[0])

	# With the list of document attributes, look for references that do not
	# have a corresponding attribute.
	refre = r"{(\w+)}"
	problems = 0
	for line, lineattr in adocdata.items():
	match = re.search(refre, line)
	if (match):
	ref = match.groups()[0]
	if ref not in attr:
	problems += 1
	print(
	f"{PREFIX}Bad attribute in "
	f"{COLORFILE}{os.path.basename(lineattr.filename)}:{lineattr.loc}"
	f"{COLORRESET}: {{{ref}}}")

	sys.exit(problems)