Skip to content

Instantly share code, notes, and snippets.

@weltonrodrigo
Created December 1, 2023 21:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save weltonrodrigo/51bd40db4eb3c1c546dee23394ff610f to your computer and use it in GitHub Desktop.
Save weltonrodrigo/51bd40db4eb3c1c546dee23394ff610f to your computer and use it in GitHub Desktop.
Pixar character generator
#!/usr/bin/env python3
import base64
import requests
import argparse
import os
import json
""" Access to gpt-4-vision and dall-e 3 is necessary """
# OpenAI API Key
api_key = "<OPEN_API_KEY>"
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Function to detect the image type
def detect_image_type(image_path):
_, file_extension = os.path.splitext(image_path)
return file_extension[1:]
# Argument parser for command line inputs
parser = argparse.ArgumentParser(description='Image path and output path.')
parser.add_argument('image_path', help='Path to your image')
parser.add_argument('output_image_path', help='Path for the output image')
args = parser.parse_args()
image_path = args.image_path
output_image_path = args.output_image_path
# Prompt for the vision model
prompt = """
Adapt this text to reflect the image: [begin] A digital 3d pixar character of <age description> <gender description>
<ethnicity description including skin color> <eyes description> <gaze description> <scene description> <light description> <background description>. The
style should be reminiscent of 3D pixar poster with attention to texture and lighting, capturing the
warmth and serenity of the scene. Make the 3d characters whimsical and fun, with joyful faces.[end]
"""
# Detecting the image type
image_type = detect_image_type(image_path)
# Getting the base64 string
base64_image = encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/{image_type};base64,{base64_image}"
}
}
]
}
],
"max_tokens": 4096
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
new_prompt = response.json()["choices"][0]["message"]["content"]
print(f"Generated Prompt: {new_prompt}")
# Now, this new prompt is fed into the DALL-E 3 model to create a new image
response = requests.post('https://api.openai.com/v1/images/generations',
headers=headers,
json={"model": "dall-e-3",
"prompt": new_prompt,
"n": 1,
"size": "1024x1024"
})
image_url = response.json()["data"][0]["url"]
revised_prompt = response.json()["data"][0]["revised_prompt"]
print(f"Revised Prompt: {revised_prompt}")
# Save image to the output path
image_data = requests.get(image_url).content
with open(output_image_path, 'wb') as handler:
handler.write(image_data)
print(f"Image successfully saved at {output_image_path}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment