Created
December 1, 2023 21:40
-
-
Save weltonrodrigo/51bd40db4eb3c1c546dee23394ff610f to your computer and use it in GitHub Desktop.
Pixar character generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import base64 | |
import requests | |
import argparse | |
import os | |
import json | |
""" Access to gpt-4-vision and dall-e 3 is necessary """ | |
# OpenAI API Key | |
api_key = "<OPEN_API_KEY>" | |
# Function to encode the image | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
# Function to detect the image type | |
def detect_image_type(image_path): | |
_, file_extension = os.path.splitext(image_path) | |
return file_extension[1:] | |
# Argument parser for command line inputs | |
parser = argparse.ArgumentParser(description='Image path and output path.') | |
parser.add_argument('image_path', help='Path to your image') | |
parser.add_argument('output_image_path', help='Path for the output image') | |
args = parser.parse_args() | |
image_path = args.image_path | |
output_image_path = args.output_image_path | |
# Prompt for the vision model | |
prompt = """ | |
Adapt this text to reflect the image: [begin] A digital 3d pixar character of <age description> <gender description> | |
<ethnicity description including skin color> <eyes description> <gaze description> <scene description> <light description> <background description>. The | |
style should be reminiscent of 3D pixar poster with attention to texture and lighting, capturing the | |
warmth and serenity of the scene. Make the 3d characters whimsical and fun, with joyful faces.[end] | |
""" | |
# Detecting the image type | |
image_type = detect_image_type(image_path) | |
# Getting the base64 string | |
base64_image = encode_image(image_path) | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
payload = { | |
"model": "gpt-4-vision-preview", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": prompt | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/{image_type};base64,{base64_image}" | |
} | |
} | |
] | |
} | |
], | |
"max_tokens": 4096 | |
} | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
new_prompt = response.json()["choices"][0]["message"]["content"] | |
print(f"Generated Prompt: {new_prompt}") | |
# Now, this new prompt is fed into the DALL-E 3 model to create a new image | |
response = requests.post('https://api.openai.com/v1/images/generations', | |
headers=headers, | |
json={"model": "dall-e-3", | |
"prompt": new_prompt, | |
"n": 1, | |
"size": "1024x1024" | |
}) | |
image_url = response.json()["data"][0]["url"] | |
revised_prompt = response.json()["data"][0]["revised_prompt"] | |
print(f"Revised Prompt: {revised_prompt}") | |
# Save image to the output path | |
image_data = requests.get(image_url).content | |
with open(output_image_path, 'wb') as handler: | |
handler.write(image_data) | |
print(f"Image successfully saved at {output_image_path}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment