Skip to content

Instantly share code, notes, and snippets.

@hunterhector
Last active June 27, 2022 18:58
Show Gist options
  • Save hunterhector/86bd69df4cfad969b66cefb54b461830 to your computer and use it in GitHub Desktop.
Save hunterhector/86bd69df4cfad969b66cefb54b461830 to your computer and use it in GitHub Desktop.
# code_block.py
#
# This script analyzes the source files with some markers to find
# the line numbers of the code blocks, and then uses markdown-autodoc
# to generate markdown files with the code blocks. One difference
# from using markdown-autodoc directly is that one don't have to
# hardcode the line numbers so can somehow feel free to modify the
# source code.
#
# Usage: python code_block.py [markdown input] [markdown out] [source code]
#
# 1. Find comments from the source files, find the markers and write
# out a mapping from the makrers to their path and line numbers.
# 2. Make a copy of the README files and replace the README side marker
# with the path and line numbers.
#
# Requires:
# - `comment_parser`
#
# Assumptions:
# - The source files should have correct extension so that the MIME
# types can be determined.
# - Have https://github.com/karolswdev/autodocs-markdown-docker docker
# ready.
# - Markdown output directory need to be in the same level/structure of the
# input (i.e. from ./input/ to ./output/, not ./input/ to ./some/output/)
# - Rely on certain string pattern to do the replacement (such as `&`)
import subprocess
import os
import shutil
import sys
import re
import logging
import mimetypes
from typing import Dict, Iterator, List, Tuple
from comment_parser import parsers
from comment_parser import comment_parser as cp
from comment_parser.comment_parser import UnsupportedError
COMMENT_MARKER_BEGIN = "SOURCE_MARKER_BEGIN_"
COMMENT_MARKER_END = "SOURCE_MARKER_END_"
COMMENT_AUTODOC_BEGIN_PATTERN = "<!-- MARKDOWN-AUTO-DOCS:START \(CODE:(.*?)\) -->"
# Setting up MIME type.
cp.MIME_MAP.update({"application/x-sh": parsers.shell_parser})
def list_files(some_path: str) -> Iterator[str]:
for dirpath, dnames, fnames in os.walk(some_path):
for f in fnames:
yield os.path.join(dirpath, f)
def scan_sources(source_dirs: List[str], strip_empty_line: bool) -> Dict[str, str]:
"""
Given a list of paths, find all text files under it and search for
all the markers in the comments. Then return these mapping from
markers to the path and line numbers that these markders enclose.
The markers are stored in consts $COMMENT_MARKER_BEGIN and
$COMMENT_MARKER_END from the comments.
Note:
- Cannot handle nested markers.
- Support limited MIME types from https://github.com/jeanralphaviles/comment_parser
- Use # style inline comments for all other file types now.
.. code-block:: python
import sys
# SOURCE_MARKER_START_some_name
print("Testing 1")
print("Testing 2")
# SOURCE_MARKER_END_some_name
# SOURCE_MARKER_START_another_name
print("Testing 3")
print("Testing 4")
print("Testing 5")
# SOURCE_MARKER_END_another_name
The mapping would look like:
.. code-block:: python
{
"some_name": "path/to/source&3-4"
"another_name": "path/to/source&8-10"
}
Args:
path (List[str]): List of file paths to scan.
strip_empty_line (bool): strip the empty lines at the start and end of the block.
Returns:
The mapping from the marker name to the locations.
"""
marker_mapping = {}
def take_block(
text_file: str, start_lineno: int, end_lineno: int, strip_empty: bool
):
empty_begins, empty_ends = 0, 0
if strip_empty:
with open(text_file, encoding="utf-8") as f:
all_lines = f.readlines()[start_lineno:end_lineno]
for l in all_lines:
if l.strip() == "":
empty_begins += 1
else:
break
for l in reversed(all_lines):
if l.strip() == "":
empty_ends += 1
else:
break
return start_lineno + empty_begins, end_lineno - 1 - empty_ends
for src_dir in source_dirs:
for fn in list_files(src_dir):
mime_type, _ = mimetypes.guess_type(fn)
if mime_type is None:
# Just try to use regular shell style comment
mime_type = "text/x-shellscript"
try:
block_name = ""
lineno = -1
for comment in cp.extract_comments(fn, mime_type):
ctext = comment.text().strip()
if ctext.startswith(COMMENT_MARKER_BEGIN):
block_name = re.sub("^" + COMMENT_MARKER_BEGIN, "", ctext)
lineno = comment.line_number()
if ctext.startswith(COMMENT_MARKER_END):
block_name_end = re.sub("^" + COMMENT_MARKER_END, "", ctext)
if block_name_end == block_name:
block_begin, block_end = take_block(
fn, lineno, comment.line_number(), strip_empty_line,
)
if block_end <= block_begin:
raise RuntimeError("Incorrect code block line numbers.")
fullpath = os.path.abspath(fn)
if fullpath not in marker_mapping:
marker_mapping[fullpath] = {}
marker_mapping[fullpath][
block_name
] = f"lines={block_begin}-{block_end}"
else:
raise RuntimeError(
"Unbalanced comment markers, scanning %s, "
"found marker name [%s] at line %d , and [%s] at line %d."
% (
fn,
block_name,
0,
block_name_end,
comment.line_number(),
)
)
logging.info("Parsing file %s of type %s", fn, mime_type)
except UnicodeDecodeError:
logging.info("Ignoring non-text file %s", fn)
except UnsupportedError:
logging.info("Ignoring supported file %s of type %s", fn, mime_type)
return marker_mapping
def run_autodoc(markdown_path: str):
"""Run autodoc on the input file.
Args:
markdown_path (str): The input markdown path.
"""
script = (
f"docker run -v $(pwd):/data -it karolswdev/autodocs-markdown-docker"
+ f" -c code-block -o {markdown_path}"
)
logging.info(f"Running Auto Doc command:")
logging.info(script)
status = os.system(script)
if not status == 0:
raise RuntimeError(f"Command run unsuccessful, return status is {status}")
def prepare_markdown(
markdown_path: str, copy_path: str, marker_dict: Dict[str, str]
) -> bool:
"""Given a markdown file, make a copy that replace the markers with the locations.
In the copied file, the following replacement will happen:
The string `<!-- MARKDOWN-AUTO-DOCS:START (CODE:src=path/to/source&some_name) -->`
will be replaced to the following given the mapping: `"some_name": "path/to/source&3-4"`
`<!-- MARKDOWN-AUTO-DOCS:START (CODE:src=path/to/source&3-4) -->`
Args:
markdown_path (str): The path to the input markdown file.
copy_path (str): The path to copy the markdown to.
markder_dict (Dict[str, str]): A mapping from the marker to the locations.
Returns:
A boolean value representing whether something is replaced.
"""
is_replaced = False
with open(markdown_path, encoding="utf-8") as f, open(
copy_path, "w", encoding="utf-8"
) as out:
for line in f:
matched = re.match(COMMENT_AUTODOC_BEGIN_PATTERN, line.strip())
if matched:
if len(matched.groups()) > 1:
raise RuntimeError(f"Find more than one AUTODOC pattern at {f}")
marker_origin = matched.groups()[0]
src_path_in_markdown, marker_label = marker_origin.split("=")[1].split(
"&"
)
src_path = os.path.join(
os.path.dirname(markdown_path), src_path_in_markdown
)
full_src_path = os.path.abspath(src_path)
if full_src_path in marker_dict:
replace_label = marker_dict[full_src_path][marker_label]
else:
raise RuntimeError(
f"Tags in a source file {full_src_path} "
f"cannot be found in the markdown file [{markdown_path}]."
)
is_replaced = True
out.write(line.replace("&" + marker_label, "&" + replace_label))
else:
out.write(line)
if is_replaced:
run_autodoc(copy_path)
return is_replaced
def prepare_all_markdowns(
markdown_dir: str, target_dir: str, marker_dict: Dict[str, str]
) -> Tuple[List[str], List[str], List[str]]:
"""Given a directory containing markdown files, replace the markdown content and copy
them to the `target_dir`, trying to keep the same directory structure.
Note:
- Find markdown files using the ".md" extension.
Args:
markdown_dir (str): input markdown directory.
target_dir (str): output directory.
marker_dict (Dict[str, str]): A mapping from the marker to the locations.
Returns:
Three list of items:
- The first one contains markdown files that are auto-replaced.
- The second one contains other markdown files that are copied.
- The third one contains other files (non-markdown) that are copied.
"""
summary = [], [], []
for dirpath, _, fnames in os.walk(markdown_dir):
structure = os.path.join(target_dir, os.path.relpath(dirpath, markdown_dir))
if not os.path.isdir(structure):
os.makedirs(structure)
for fname in fnames:
src_file = os.path.join(dirpath, fname)
target_file = os.path.normpath(os.path.join(structure, fname))
if fname.endswith(".md"):
# Replacing auto-doc comments
if prepare_markdown(src_file, target_file, marker_dict,):
summary[0].append(target_file)
else:
summary[1].append(target_file)
else:
shutil.copyfile(src_file, target_file)
summary[2].append(target_file)
return summary
if __name__ == "__main__":
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# Scan the source files.
mark_mapping = scan_sources(sys.argv[3:], strip_empty_line=True)
summaries = prepare_all_markdowns(sys.argv[1], sys.argv[2], mark_mapping)
logging.info(f"{len(summaries[0])} markdown files get auto replaced.")
logging.info(f"{len(summaries[1])} markdown files copied.")
logging.info(f"{len(summaries[2])} other files copied.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment