Skip to content

Instantly share code, notes, and snippets.

@t04glovern
Last active March 23, 2023 04:38
Show Gist options
  • Save t04glovern/6f46e00e754573ed68db661717f3751a to your computer and use it in GitHub Desktop.
Save t04glovern/6f46e00e754573ed68db661717f3751a to your computer and use it in GitHub Desktop.
import os
import openai
import json
import frontmatter
import time
import requests
# pip install python-frontmatter requests openai
openai.api_key = os.getenv("OPENAI_API_KEY")
def generate_post_header(prompt):
# Chat template string, to be used for generating Overpass API queries
CHAT_TEMPLATE = """Assistant is an expert AWS blogger with Markdown assistant.
For each blog post title the user supplies, the assistant will reply with
(1) a blog post in markdown format with the following section populated appropriately at the top of the post:
---
title:
slug:
description:
date:
published: true
author: Nathan Glover
tags: ["tag1", "tag2"]
featuredImage: image/featured.png
ogImage: image/og.png
---
Human: {human_input}
Assistant:"""
print("Generating blog post header...")
header_text = openai.Completion.create(
engine="text-davinci-003",
prompt=CHAT_TEMPLATE.format(human_input=prompt),
max_tokens=1900,
).choices[0].text
# Remove indentation
header_text = header_text.strip()
# Remove everything after the last "---"
header_text = header_text[:header_text.rfind("---")]
# Add the last "---" back to the text
header_text += "\n---"
return header_text
def generate_post(model_name=None, output_dir="content/blog"):
# Get input prompt from user
prompt = input("Enter the prompt for your blog post:\n")
# Returns the markdown header for the blog post
generated_header = generate_post_header(prompt)
# Generates blog post content
# if no model_name specified use engine, otherwise use model
print("Generating blog post content...")
if model_name is None:
generated_text = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
temperature=0.2,
max_tokens=3900
).choices[0].text
else:
generated_text = openai.Completion.create(
model=model_name,
prompt=prompt,
temperature=0.2,
max_tokens=3900
).choices[0].text
# Load markdown
generated_post = frontmatter.loads(generated_header + generated_text)
# Override the date, featuredImage, and ogImage fields
generated_post['date'] = time.strftime("%Y-%m-%d %H:%M:%S")
generated_post['featuredImage'] = f"img/{generated_post['slug']}.jpg"
generated_post['ogImage'] = f"img/{generated_post['slug']}-seo.jpg"
# Save output to a file called index.mdx in the output directory inside a folder called the slug
if not os.path.exists(os.path.join(output_dir, generated_post['slug'])):
os.makedirs(os.path.join(output_dir, generated_post['slug']))
# Create img directory
os.makedirs(os.path.join(output_dir, generated_post['slug'], "img"))
# Check if file exists, if it does, prompt user to overwrite
if os.path.exists(os.path.join(output_dir, generated_post['slug'], "index.mdx")):
overwrite = input(
f"File {os.path.join(output_dir, generated_post['slug'], 'index.mdx')} exists. Overwrite? (y/n): ")
if overwrite.lower() != "y":
print("File not overwritten.")
exit()
with open(os.path.join(output_dir, generated_post['slug'], "index.mdx"), "w") as f:
f.write(frontmatter.dumps(generated_post))
# Generate iamge for the post
print("Generating image for blog post...")
image_url = openai.Image.create(
prompt=prompt,
n=1,
size="1024x1024"
)['data'][0]['url']
response = requests.get(image_url)
response.raise_for_status()
with open(os.path.join(output_dir, generated_post['slug'], "img", f"{generated_post['slug']}.jpg"), 'wb') as f:
f.write(response.content)
def generate_post_list():
MAX_LENGTH = 2048
blogs_dir = os.path.join(os.getcwd(), 'content', 'blog')
# Create a JSONL file to write all the posts to
with open('/tmp/posts.jsonl', 'w') as all_posts_file:
# Loop through all subdirectories in content/blog
for blog_name in os.listdir(blogs_dir):
blog_dir = os.path.join(blogs_dir, blog_name)
index_file = os.path.join(blog_dir, 'index.mdx')
# If the index file exists, read its contents and write a JSONL entry for it
if os.path.exists(index_file):
with open(index_file, 'r') as f:
post = frontmatter.load(f)
# Truncate prompt and completion to 2048 combined characters if necessary
combined_len = len(post['title']) + len(post.content)
if combined_len > MAX_LENGTH:
content_len = min(len(post.content),
MAX_LENGTH - len(post['title']) - 1)
post.content = post.content[:content_len] + '…'
# Write JSONL strings for each post to the all_posts_file
json_str = json.dumps(
{"prompt": post['title'], "completion": str(post.content)})
all_posts_file.write(f"{json_str}\n")
def upload_fine_tune():
response = openai.File.create(
file=open("/tmp/posts.jsonl", "rb"),
purpose='fine-tune'
)
openai.FineTune.create(
training_file=response.id
)
return response.id
if __name__ == "__main__":
# Defaults to not using fine-tuned model unless USE_FINE_TUNED_MODEL is set to True
FINE_TUNED_MODEL_ID = os.getenv("FINE_TUNED_MODEL_ID", None)
USE_FINE_TUNED_MODEL = os.getenv("USE_FINE_TUNED_MODEL", False)
if USE_FINE_TUNED_MODEL and FINE_TUNED_MODEL_ID is None:
print("Generating fine-tuned model...")
generate_post_list()
model_id = upload_fine_tune()
while openai.FineTune.retrieve(id=model_id)['status'] != 'succeeded':
print("Waiting for fine-tuned model to be ready...")
time.sleep(10)
model_name = openai.FineTune.retrieve(id=model_id)['fine_tuned_model']
generate_post(model_name)
elif USE_FINE_TUNED_MODEL and FINE_TUNED_MODEL_ID is not None:
print("Using fine-tuned model...")
model_name = openai.FineTune.retrieve(id=FINE_TUNED_MODEL_ID)[
'fine_tuned_model']
generate_post(model_name)
else:
print("Using default model...")
generate_post()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment