Skip to content

Instantly share code, notes, and snippets.

@palevell
Last active May 7, 2023 18:07
Show Gist options
  • Save palevell/d366a21b4477bc18c1a56c218cd7cc93 to your computer and use it in GitHub Desktop.
Save palevell/d366a21b4477bc18c1a56c218cd7cc93 to your computer and use it in GitHub Desktop.
Parses message.content of OpenAI chat responses
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# markdown_parser.py - Sunday, May 7, 2023
# Via ChatGPT
import os
import re
import sys
import argparse
import subprocess
from bs4 import BeautifulSoup
def parse_markdown_file(md_file_path):
with open(md_file_path, "r") as f:
md_content = f.read()
code_blocks = re.findall(r"```([a-zA-Z0-9]+)?\n([\s\S]+?)\n```", md_content)
css_filename = None
js_filename = None
for i, block in enumerate(code_blocks):
lang, code = block
if lang and lang.lower() in ["html", "htm"]:
soup = BeautifulSoup(code, "html.parser")
css_tags = soup.find_all("link", {"rel": "stylesheet"})
if css_tags:
css_filename = css_tags[0].get("href")
if css_filename.endswith(".css"):
css_filename = os.path.basename(css_filename)
js_tags = soup.find_all("script", {"src": True})
if js_tags:
js_filename = js_tags[0].get("src")
if js_filename.endswith(".js"):
js_filename = os.path.basename(js_filename)
filename = "index.html" if i == 0 else f"{i}.html"
with open(filename, "w") as f:
f.write(code)
print(f"Saved HTML code block to {filename}")
elif lang and lang.lower() == "css":
if not css_filename:
css_filename = "style.css"
with open(css_filename, "w") as f:
f.write(code)
print(f"Saved CSS code block to {css_filename}")
elif lang and lang.lower() in ["js", "javascript"]:
if not js_filename:
js_filename = "script.js"
with open(js_filename, "w") as f:
f.write(code)
print(f"Saved JavaScript code block to {js_filename}")
else:
extension = lang.lower() if lang else "unknown"
filename = f"{i}.{extension}"
with open(filename, "w") as f:
f.write(code)
print(f"Saved code block to {filename}")
unknown_files = [f for f in os.listdir() if f.endswith(".unknown")]
if unknown_files:
for filename in unknown_files:
try:
output = subprocess.check_output(["file", filename])
print(f"{filename}: {output.decode().strip()}")
except Exception as e:
print(f"Error while determining file type for {filename}: {e}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Parse Markdown file and extract code blocks")
parser.add_argument("markdown_file", help="Path to Markdown file to parse")
args = parser.parse_args()
parse_markdown_file(args.markdown_file)
I need a Python script that can parse a file that contains Markdown. The markdown file contains code blocks that are delimited by three back-ticks (ie. "```"). The opening delimiter may contain the name of the language immediately after the back ticks. In this case, the entire code block should be saved with a filename, using the language as the file extension. For example a "```html" delimiter indicates the filename should have a ".html" extension. The first HTML file encountered can be saved as "index.html". The HTML code block may contain an href to a CSS file (ie. "style.css" or "styles.css"). This name should be used when CSS code blocks are encountered. The HTML code block may also contain a src reference to a JavaScript file (ie. "script.js" or "scripts.js"). This name should be used when JavaScript code blocks are encountered. Use the BeautifulSoup library to extract the filenames of linked CSS and/or JavaScript. If the opening delimiter lacks the name of the language, use your own best judgment in determining the language. If the language of the code block cannot be determined, the filename can use a ".unknown" extension. When finished processing the Markdown file, the script should check for files it saved with a ".unknown" extension and use the "file" utility to determine the contents of the file, and show the results.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment