Skip to content

Instantly share code, notes, and snippets.

@dbetm
Created April 4, 2023 22:55
Show Gist options
  • Save dbetm/8f0b8ca0b46e1efff3fa5f16ec7be9f1 to your computer and use it in GitHub Desktop.
Save dbetm/8f0b8ca0b46e1efff3fa5f16ec7be9f1 to your computer and use it in GitHub Desktop.
Currently there's an issue when generating a single page using singlehtml sphinx builder and there are sections with the same name, this is a temporal fix that you can use until fix the issue: https://github.com/sphinx-doc/sphinx/issues/4814
import argparse
from typing import Any
from bs4 import BeautifulSoup # pip install beautifulsoup4
def update_h3_child(section: Any, new_id: str):
"""Find the first child of the given section, if it exists
then update its anchor with the given `new_id`."""
h3_first_child = section.find("h3")
if h3_first_child:
h3_first_child.a["href"] = f"#{new_id}"
def deduplicate_anchors(filepath: str, verbose: bool = True) -> None:
"""Deduplicate section id's and anchors of toc-tree when the
HTML was build with `singlehtml` builder and there are sections
with the same name. It will overwrite the HTML file.
Temporal fix of current issue of sphinx:
https://github.com/sphinx-doc/sphinx/issues/4814
"""
# Read the HTML file into a string
with open(filepath, "r") as file:
html = file.read()
# Parse the HTML document
soup = BeautifulSoup(html, "html.parser")
# Compile all section id's to create a map, with
# values as lists containing renamed unique ids (assigning a number
# after the first duplicated found).
section_elements_with_id = soup.find_all(name="section", id=True)
sections_ids_map = dict()
for section in section_elements_with_id:
id = new_id = section.get("id")
if not id in sections_ids_map:
sections_ids_map[id] = list()
else:
new_id = f"{id}-{len(sections_ids_map[id]) + 1}"
section["id"] = new_id
sections_ids_map[id].append(new_id)
update_h3_child(section, new_id)
if verbose: print("section id's map: ", sections_ids_map)
# Update toc-tree
toc_tree_div = soup.find("div", class_="toc-tree")
filename = filepath.split("/")[-1]
a_elements = toc_tree_div.find_all("a", class_="reference internal")
for a in a_elements:
anchor_id = a["href"].split("#")[-1]
if anchor_id in sections_ids_map:
# update href of the a element and delete used section id
a["href"] = f"{filename}#{sections_ids_map[anchor_id].pop(0)}"
# Save the modified HTML to a file
with open(filepath, "w") as file:
file.write(soup.prettify())
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--filepath", type=str, required=False, default="index.html"
)
args = parser.parse_args()
try:
deduplicate_anchors(args.filepath)
except Exception as e:
print("Failed to deduplicated anchors")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment