Created
April 4, 2023 22:55
-
-
Save dbetm/8f0b8ca0b46e1efff3fa5f16ec7be9f1 to your computer and use it in GitHub Desktop.
Currently there's an issue when generating a single page using singlehtml sphinx builder and there are sections with the same name, this is a temporal fix that you can use until fix the issue: https://github.com/sphinx-doc/sphinx/issues/4814
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from typing import Any | |
from bs4 import BeautifulSoup # pip install beautifulsoup4 | |
def update_h3_child(section: Any, new_id: str): | |
"""Find the first child of the given section, if it exists | |
then update its anchor with the given `new_id`.""" | |
h3_first_child = section.find("h3") | |
if h3_first_child: | |
h3_first_child.a["href"] = f"#{new_id}" | |
def deduplicate_anchors(filepath: str, verbose: bool = True) -> None: | |
"""Deduplicate section id's and anchors of toc-tree when the | |
HTML was build with `singlehtml` builder and there are sections | |
with the same name. It will overwrite the HTML file. | |
Temporal fix of current issue of sphinx: | |
https://github.com/sphinx-doc/sphinx/issues/4814 | |
""" | |
# Read the HTML file into a string | |
with open(filepath, "r") as file: | |
html = file.read() | |
# Parse the HTML document | |
soup = BeautifulSoup(html, "html.parser") | |
# Compile all section id's to create a map, with | |
# values as lists containing renamed unique ids (assigning a number | |
# after the first duplicated found). | |
section_elements_with_id = soup.find_all(name="section", id=True) | |
sections_ids_map = dict() | |
for section in section_elements_with_id: | |
id = new_id = section.get("id") | |
if not id in sections_ids_map: | |
sections_ids_map[id] = list() | |
else: | |
new_id = f"{id}-{len(sections_ids_map[id]) + 1}" | |
section["id"] = new_id | |
sections_ids_map[id].append(new_id) | |
update_h3_child(section, new_id) | |
if verbose: print("section id's map: ", sections_ids_map) | |
# Update toc-tree | |
toc_tree_div = soup.find("div", class_="toc-tree") | |
filename = filepath.split("/")[-1] | |
a_elements = toc_tree_div.find_all("a", class_="reference internal") | |
for a in a_elements: | |
anchor_id = a["href"].split("#")[-1] | |
if anchor_id in sections_ids_map: | |
# update href of the a element and delete used section id | |
a["href"] = f"{filename}#{sections_ids_map[anchor_id].pop(0)}" | |
# Save the modified HTML to a file | |
with open(filepath, "w") as file: | |
file.write(soup.prettify()) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--filepath", type=str, required=False, default="index.html" | |
) | |
args = parser.parse_args() | |
try: | |
deduplicate_anchors(args.filepath) | |
except Exception as e: | |
print("Failed to deduplicated anchors") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment