rdegges/trans-to-blog.py

## trans-to-blog.py
"""
    trans-to-blog.py
    ~~~~~~~~~~~~~~~~

    A simple script that, given a podcast transcript file (in any format), will use OpenAI
    to parse the transcript and create a blog post in Markdown format based on the
    contents of the transcript.

    Requirements
    ~~~~~~~~~~~~

    $ pip install openai
    $ export OPENAI_API_KEY=<your-key-here>

    Usage
    ~~~~~

    $ python trans-to-blog.py <path/to/transcript/file>
"""
import os
import openai

# Get the API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")

def create_blog_post(transcript_path):
    # Read the transcript file
    with open(transcript_path, 'r') as file:
        data = file.read().replace('\n', '')

    # Break the transcript into chunks for processing
    chunks = [data[i:i+2048] for i in range(0, len(data), 2048)]

    # Initialize the conversation with the system and user's initial message
    conversation_messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Create an interesting blog post in Markdown format based on the following snippet of a podcast transcript. Please note that the blog post should be highlight interesting parts of the transcript. Also please note that this is likely a snippet of a transcript, not the entire transcript. Here's the transcript: " + chunks[0]}
    ]

    blog_post_content = ''

    for chunk in chunks[1:]:
        # Add the user's next message to the conversation
        conversation_messages.append({"role": "user", "content": chunk})

        # Ensure that the total number of tokens does not exceed the model's maximum limit
        while sum(len(message["content"]) for message in conversation_messages) > 4096:
            conversation_messages.pop(0)

        # Continue the chat conversation with the assistant
        conversation = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=conversation_messages
        )

        # Add the assistant's reply to the conversation
        conversation_messages.append({
            "role": "assistant",
            "content": conversation['choices'][0]['message']['content']
        })

        # Add the assistant's reply to the blog post content
        blog_post_content += conversation['choices'][0]['message']['content']

    # Print the Markdown formatted blog post
    print(blog_post_content)

if __name__ == "__main__":
    import sys
    transcript_path = sys.argv[1]
    create_blog_post(transcript_path)
	"""
	trans-to-blog.py
	~~~~~~~~~~~~~~~~

	A simple script that, given a podcast transcript file (in any format), will use OpenAI
	to parse the transcript and create a blog post in Markdown format based on the
	contents of the transcript.

	Requirements
	~~~~~~~~~~~~

	$ pip install openai
	$ export OPENAI_API_KEY=<your-key-here>

	Usage
	~~~~~

	$ python trans-to-blog.py <path/to/transcript/file>
	"""
	import os
	import openai

	# Get the API key from environment variable
	openai.api_key = os.getenv("OPENAI_API_KEY")

	def create_blog_post(transcript_path):
	# Read the transcript file
	with open(transcript_path, 'r') as file:
	data = file.read().replace('\n', '')

	# Break the transcript into chunks for processing
	chunks = [data[i:i+2048] for i in range(0, len(data), 2048)]

	# Initialize the conversation with the system and user's initial message
	conversation_messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Create an interesting blog post in Markdown format based on the following snippet of a podcast transcript. Please note that the blog post should be highlight interesting parts of the transcript. Also please note that this is likely a snippet of a transcript, not the entire transcript. Here's the transcript: " + chunks[0]}
	]

	blog_post_content = ''

	for chunk in chunks[1:]:
	# Add the user's next message to the conversation
	conversation_messages.append({"role": "user", "content": chunk})

	# Ensure that the total number of tokens does not exceed the model's maximum limit
	while sum(len(message["content"]) for message in conversation_messages) > 4096:
	conversation_messages.pop(0)

	# Continue the chat conversation with the assistant
	conversation = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=conversation_messages
	)

	# Add the assistant's reply to the conversation
	conversation_messages.append({
	"role": "assistant",
	"content": conversation['choices'][0]['message']['content']
	})

	# Add the assistant's reply to the blog post content
	blog_post_content += conversation['choices'][0]['message']['content']

	# Print the Markdown formatted blog post
	print(blog_post_content)

	if __name__ == "__main__":
	import sys
	transcript_path = sys.argv[1]
	create_blog_post(transcript_path)