Last active
March 23, 2023 04:38
-
-
Save t04glovern/6f46e00e754573ed68db661717f3751a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import openai | |
import json | |
import frontmatter | |
import time | |
import requests | |
# pip install python-frontmatter requests openai | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
def generate_post_header(prompt): | |
# Chat template string, to be used for generating Overpass API queries | |
CHAT_TEMPLATE = """Assistant is an expert AWS blogger with Markdown assistant. | |
For each blog post title the user supplies, the assistant will reply with | |
(1) a blog post in markdown format with the following section populated appropriately at the top of the post: | |
--- | |
title: | |
slug: | |
description: | |
date: | |
published: true | |
author: Nathan Glover | |
tags: ["tag1", "tag2"] | |
featuredImage: image/featured.png | |
ogImage: image/og.png | |
--- | |
Human: {human_input} | |
Assistant:""" | |
print("Generating blog post header...") | |
header_text = openai.Completion.create( | |
engine="text-davinci-003", | |
prompt=CHAT_TEMPLATE.format(human_input=prompt), | |
max_tokens=1900, | |
).choices[0].text | |
# Remove indentation | |
header_text = header_text.strip() | |
# Remove everything after the last "---" | |
header_text = header_text[:header_text.rfind("---")] | |
# Add the last "---" back to the text | |
header_text += "\n---" | |
return header_text | |
def generate_post(model_name=None, output_dir="content/blog"): | |
# Get input prompt from user | |
prompt = input("Enter the prompt for your blog post:\n") | |
# Returns the markdown header for the blog post | |
generated_header = generate_post_header(prompt) | |
# Generates blog post content | |
# if no model_name specified use engine, otherwise use model | |
print("Generating blog post content...") | |
if model_name is None: | |
generated_text = openai.Completion.create( | |
engine="text-davinci-003", | |
prompt=prompt, | |
temperature=0.2, | |
max_tokens=3900 | |
).choices[0].text | |
else: | |
generated_text = openai.Completion.create( | |
model=model_name, | |
prompt=prompt, | |
temperature=0.2, | |
max_tokens=3900 | |
).choices[0].text | |
# Load markdown | |
generated_post = frontmatter.loads(generated_header + generated_text) | |
# Override the date, featuredImage, and ogImage fields | |
generated_post['date'] = time.strftime("%Y-%m-%d %H:%M:%S") | |
generated_post['featuredImage'] = f"img/{generated_post['slug']}.jpg" | |
generated_post['ogImage'] = f"img/{generated_post['slug']}-seo.jpg" | |
# Save output to a file called index.mdx in the output directory inside a folder called the slug | |
if not os.path.exists(os.path.join(output_dir, generated_post['slug'])): | |
os.makedirs(os.path.join(output_dir, generated_post['slug'])) | |
# Create img directory | |
os.makedirs(os.path.join(output_dir, generated_post['slug'], "img")) | |
# Check if file exists, if it does, prompt user to overwrite | |
if os.path.exists(os.path.join(output_dir, generated_post['slug'], "index.mdx")): | |
overwrite = input( | |
f"File {os.path.join(output_dir, generated_post['slug'], 'index.mdx')} exists. Overwrite? (y/n): ") | |
if overwrite.lower() != "y": | |
print("File not overwritten.") | |
exit() | |
with open(os.path.join(output_dir, generated_post['slug'], "index.mdx"), "w") as f: | |
f.write(frontmatter.dumps(generated_post)) | |
# Generate iamge for the post | |
print("Generating image for blog post...") | |
image_url = openai.Image.create( | |
prompt=prompt, | |
n=1, | |
size="1024x1024" | |
)['data'][0]['url'] | |
response = requests.get(image_url) | |
response.raise_for_status() | |
with open(os.path.join(output_dir, generated_post['slug'], "img", f"{generated_post['slug']}.jpg"), 'wb') as f: | |
f.write(response.content) | |
def generate_post_list(): | |
MAX_LENGTH = 2048 | |
blogs_dir = os.path.join(os.getcwd(), 'content', 'blog') | |
# Create a JSONL file to write all the posts to | |
with open('/tmp/posts.jsonl', 'w') as all_posts_file: | |
# Loop through all subdirectories in content/blog | |
for blog_name in os.listdir(blogs_dir): | |
blog_dir = os.path.join(blogs_dir, blog_name) | |
index_file = os.path.join(blog_dir, 'index.mdx') | |
# If the index file exists, read its contents and write a JSONL entry for it | |
if os.path.exists(index_file): | |
with open(index_file, 'r') as f: | |
post = frontmatter.load(f) | |
# Truncate prompt and completion to 2048 combined characters if necessary | |
combined_len = len(post['title']) + len(post.content) | |
if combined_len > MAX_LENGTH: | |
content_len = min(len(post.content), | |
MAX_LENGTH - len(post['title']) - 1) | |
post.content = post.content[:content_len] + '…' | |
# Write JSONL strings for each post to the all_posts_file | |
json_str = json.dumps( | |
{"prompt": post['title'], "completion": str(post.content)}) | |
all_posts_file.write(f"{json_str}\n") | |
def upload_fine_tune(): | |
response = openai.File.create( | |
file=open("/tmp/posts.jsonl", "rb"), | |
purpose='fine-tune' | |
) | |
openai.FineTune.create( | |
training_file=response.id | |
) | |
return response.id | |
if __name__ == "__main__": | |
# Defaults to not using fine-tuned model unless USE_FINE_TUNED_MODEL is set to True | |
FINE_TUNED_MODEL_ID = os.getenv("FINE_TUNED_MODEL_ID", None) | |
USE_FINE_TUNED_MODEL = os.getenv("USE_FINE_TUNED_MODEL", False) | |
if USE_FINE_TUNED_MODEL and FINE_TUNED_MODEL_ID is None: | |
print("Generating fine-tuned model...") | |
generate_post_list() | |
model_id = upload_fine_tune() | |
while openai.FineTune.retrieve(id=model_id)['status'] != 'succeeded': | |
print("Waiting for fine-tuned model to be ready...") | |
time.sleep(10) | |
model_name = openai.FineTune.retrieve(id=model_id)['fine_tuned_model'] | |
generate_post(model_name) | |
elif USE_FINE_TUNED_MODEL and FINE_TUNED_MODEL_ID is not None: | |
print("Using fine-tuned model...") | |
model_name = openai.FineTune.retrieve(id=FINE_TUNED_MODEL_ID)[ | |
'fine_tuned_model'] | |
generate_post(model_name) | |
else: | |
print("Using default model...") | |
generate_post() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment