Created
June 20, 2023 20:01
-
-
Save sgugger/c04ff49cbbd6696a3c4bc6d86c8d1644 to your computer and use it in GitHub Desktop.
Convert all MDX to MD in the doc folder of a HF repo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import re | |
import shutil | |
from pathlib import Path | |
from tqdm import tqdm | |
DISCLAIMER = """⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be | |
rendered properly in your Markdown viewer.""" | |
def treat_doc_file(doc_file): | |
doc_file = str(doc_file) | |
if not doc_file.endswith(".mdx"): | |
return | |
new_path = doc_file.replace(".mdx", ".md") | |
shutil.move(doc_file, new_path) | |
with open(new_path, "r", encoding="utf-8") as f: | |
content = f.read() | |
if re.search("^\s*<!--", content) is not None: | |
# Add disclaimer to first comment | |
lines = content.split("\n") | |
idx = 0 | |
while not lines[idx].rstrip().endswith("-->"): | |
idx += 1 | |
lines = lines[:idx] + ["", DISCLAIMER] + lines[idx:] | |
content = "\n".join(lines) | |
else: | |
content = f"<!--{DISCLAIMER}\n-->\n\n" + content | |
with open(new_path, "w", encoding="utf-8") as f: | |
f.write(content) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--doc_folder", type=str, help="Where the doc folder is.") | |
args = parser.parse_args() | |
all_doc_files = list(Path(args.doc_folder).glob("**/*.mdx")) | |
for doc_file in tqdm(all_doc_files): | |
treat_doc_file(doc_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment