Created
May 22, 2023 05:24
-
-
Save rdegges/e5adc01b91ccd99430c79d7cbd4af982 to your computer and use it in GitHub Desktop.
Simple script to convert a podcast transcript into a blog post.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
trans-to-blog.py | |
~~~~~~~~~~~~~~~~ | |
A simple script that, given a podcast transcript file (in any format), will use OpenAI | |
to parse the transcript and create a blog post in Markdown format based on the | |
contents of the transcript. | |
Requirements | |
~~~~~~~~~~~~ | |
$ pip install openai | |
$ export OPENAI_API_KEY=<your-key-here> | |
Usage | |
~~~~~ | |
$ python trans-to-blog.py <path/to/transcript/file> | |
""" | |
import os | |
import openai | |
# Get the API key from environment variable | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
def create_blog_post(transcript_path): | |
# Read the transcript file | |
with open(transcript_path, 'r') as file: | |
data = file.read().replace('\n', '') | |
# Break the transcript into chunks for processing | |
chunks = [data[i:i+2048] for i in range(0, len(data), 2048)] | |
# Initialize the conversation with the system and user's initial message | |
conversation_messages = [ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": "Create an interesting blog post in Markdown format based on the following snippet of a podcast transcript. Please note that the blog post should be highlight interesting parts of the transcript. Also please note that this is likely a snippet of a transcript, not the entire transcript. Here's the transcript: " + chunks[0]} | |
] | |
blog_post_content = '' | |
for chunk in chunks[1:]: | |
# Add the user's next message to the conversation | |
conversation_messages.append({"role": "user", "content": chunk}) | |
# Ensure that the total number of tokens does not exceed the model's maximum limit | |
while sum(len(message["content"]) for message in conversation_messages) > 4096: | |
conversation_messages.pop(0) | |
# Continue the chat conversation with the assistant | |
conversation = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=conversation_messages | |
) | |
# Add the assistant's reply to the conversation | |
conversation_messages.append({ | |
"role": "assistant", | |
"content": conversation['choices'][0]['message']['content'] | |
}) | |
# Add the assistant's reply to the blog post content | |
blog_post_content += conversation['choices'][0]['message']['content'] | |
# Print the Markdown formatted blog post | |
print(blog_post_content) | |
if __name__ == "__main__": | |
import sys | |
transcript_path = sys.argv[1] | |
create_blog_post(transcript_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment