Skip to content

Instantly share code, notes, and snippets.

@LeoBakerHytch
Forked from thomasantony/chatgpt_parser_md.py
Created December 5, 2022 21:34
Show Gist options
  • Save LeoBakerHytch/9bb06eed57b779b5b59993734c76ca29 to your computer and use it in GitHub Desktop.
Save LeoBakerHytch/9bb06eed57b779b5b59993734c76ca29 to your computer and use it in GitHub Desktop.
Convert saved HTML transcripts from ChatGPT to Markdown
# Save the transcripts using the "Save Page WE" Chrome Extension
# This script was generated by ChatGPT
import sys
from bs4 import BeautifulSoup
# Check if a file was provided as a command line argument
if len(sys.argv) < 2:
print("Please provide an HTML file as a command line argument.")
sys.exit(1)
# Read the HTML file
html_file = sys.argv[1]
with open(html_file, 'r') as f:
html = f.read()
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Find all the elements with the 'ConversationItem__ConversationItemWrapper-' class
conversation_elements = soup.find_all(class_=lambda c: c and c.startswith('ConversationItem__ConversationItemWrapper-'))
# Output the conversation as a Markdown quote
for i, element in enumerate(conversation_elements):
text = element.get_text()
lines = text.split('\n')
if i % 2 == 0:
speaker = "User"
else:
speaker = "Assistant"
first_line = True
for line in lines:
if first_line:
print(speaker)
first_line = False
print(f"> {line}")
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment