-
-
Save LeoBakerHytch/9bb06eed57b779b5b59993734c76ca29 to your computer and use it in GitHub Desktop.
Convert saved HTML transcripts from ChatGPT to Markdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Save the transcripts using the "Save Page WE" Chrome Extension | |
# This script was generated by ChatGPT | |
import sys | |
from bs4 import BeautifulSoup | |
# Check if a file was provided as a command line argument | |
if len(sys.argv) < 2: | |
print("Please provide an HTML file as a command line argument.") | |
sys.exit(1) | |
# Read the HTML file | |
html_file = sys.argv[1] | |
with open(html_file, 'r') as f: | |
html = f.read() | |
# Parse the HTML using BeautifulSoup | |
soup = BeautifulSoup(html, 'html.parser') | |
# Find all the elements with the 'ConversationItem__ConversationItemWrapper-' class | |
conversation_elements = soup.find_all(class_=lambda c: c and c.startswith('ConversationItem__ConversationItemWrapper-')) | |
# Output the conversation as a Markdown quote | |
for i, element in enumerate(conversation_elements): | |
text = element.get_text() | |
lines = text.split('\n') | |
if i % 2 == 0: | |
speaker = "User" | |
else: | |
speaker = "Assistant" | |
first_line = True | |
for line in lines: | |
if first_line: | |
print(speaker) | |
first_line = False | |
print(f"> {line}") | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment