weltonrodrigo/pixify.py

## pixify.py
#!/usr/bin/env python3
import base64
import requests
import argparse
import os
import json

""" Access to gpt-4-vision and dall-e 3 is necessary """

# OpenAI API Key
api_key = "<OPEN_API_KEY>"

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Function to detect the image type
def detect_image_type(image_path):
    _, file_extension = os.path.splitext(image_path)
    return file_extension[1:]

# Argument parser for command line inputs
parser = argparse.ArgumentParser(description='Image path and output path.')
parser.add_argument('image_path', help='Path to your image')
parser.add_argument('output_image_path', help='Path for the output image')
args = parser.parse_args()

image_path = args.image_path
output_image_path = args.output_image_path

# Prompt for the vision model
prompt = """
Adapt this text to reflect the image: [begin] A digital 3d pixar character of <age description> <gender description>
<ethnicity description including skin color> <eyes description> <gaze description> <scene description> <light description> <background description>. The
style should be reminiscent of 3D pixar poster with attention to texture and lighting, capturing the
warmth and serenity of the scene. Make the 3d characters whimsical and fun, with joyful faces.[end]
"""

# Detecting the image type
image_type = detect_image_type(image_path)

# Getting the base64 string
base64_image = encode_image(image_path)

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

payload = {
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/{image_type};base64,{base64_image}"
          }
        }
      ]
    }
  ],
  "max_tokens": 4096
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

new_prompt = response.json()["choices"][0]["message"]["content"]

print(f"Generated Prompt: {new_prompt}")

# Now, this new prompt is fed into the DALL-E 3 model to create a new image
response = requests.post('https://api.openai.com/v1/images/generations',
                         headers=headers,
                         json={"model": "dall-e-3",
                               "prompt": new_prompt,
                               "n": 1,
                               "size": "1024x1024"
                              })

image_url = response.json()["data"][0]["url"]
revised_prompt = response.json()["data"][0]["revised_prompt"]

print(f"Revised Prompt: {revised_prompt}")

# Save image to the output path
image_data = requests.get(image_url).content
with open(output_image_path, 'wb') as handler:
    handler.write(image_data)

print(f"Image successfully saved at {output_image_path}")
	#!/usr/bin/env python3
	import base64
	import requests
	import argparse
	import os
	import json

	""" Access to gpt-4-vision and dall-e 3 is necessary """

	# OpenAI API Key
	api_key = "<OPEN_API_KEY>"

	# Function to encode the image
	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	# Function to detect the image type
	def detect_image_type(image_path):
	_, file_extension = os.path.splitext(image_path)
	return file_extension[1:]

	# Argument parser for command line inputs
	parser = argparse.ArgumentParser(description='Image path and output path.')
	parser.add_argument('image_path', help='Path to your image')
	parser.add_argument('output_image_path', help='Path for the output image')
	args = parser.parse_args()

	image_path = args.image_path
	output_image_path = args.output_image_path

	# Prompt for the vision model
	prompt = """
	Adapt this text to reflect the image: [begin] A digital 3d pixar character of <age description> <gender description>
	<ethnicity description including skin color> <eyes description> <gaze description> <scene description> <light description> <background description>. The
	style should be reminiscent of 3D pixar poster with attention to texture and lighting, capturing the
	warmth and serenity of the scene. Make the 3d characters whimsical and fun, with joyful faces.[end]
	"""

	# Detecting the image type
	image_type = detect_image_type(image_path)

	# Getting the base64 string
	base64_image = encode_image(image_path)

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}

	payload = {
	"model": "gpt-4-vision-preview",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/{image_type};base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 4096
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

	new_prompt = response.json()["choices"][0]["message"]["content"]

	print(f"Generated Prompt: {new_prompt}")

	# Now, this new prompt is fed into the DALL-E 3 model to create a new image
	response = requests.post('https://api.openai.com/v1/images/generations',
	headers=headers,
	json={"model": "dall-e-3",
	"prompt": new_prompt,
	"n": 1,
	"size": "1024x1024"
	})

	image_url = response.json()["data"][0]["url"]
	revised_prompt = response.json()["data"][0]["revised_prompt"]

	print(f"Revised Prompt: {revised_prompt}")

	# Save image to the output path
	image_data = requests.get(image_url).content
	with open(output_image_path, 'wb') as handler:
	handler.write(image_data)

	print(f"Image successfully saved at {output_image_path}")