Skip to content

Instantly share code, notes, and snippets.

@taesiri
Created May 19, 2024 22:07
Show Gist options
  • Save taesiri/a13232ecd0541cbb5016edbc6a500ca1 to your computer and use it in GitHub Desktop.
Save taesiri/a13232ecd0541cbb5016edbc6a500ca1 to your computer and use it in GitHub Desktop.
get-captions-gpt
import openai
import os
import json
import base64
import argparse
from time import sleep
from openai import OpenAI
from tqdm import tqdm
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
def encode_image_to_base64(image_path):
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
return encoded_string
def process_image(image_path):
encoded_image = encode_image_to_base64(image_path)
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this picture:"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
},
],
},
],
)
return response
def save_result(image_path, result, output_dir):
output_path = os.path.join(
output_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_result.json"
)
with open(output_path, "w") as output_file:
json.dump(result.model_dump(), output_file)
def main(image_dir, output_dir):
os.makedirs(output_dir, exist_ok=True)
images = [
os.path.join(image_dir, img)
for img in os.listdir(image_dir)
if img.endswith((".png", ".jpg", ".jpeg"))
]
for image_path in tqdm(images, desc="Processing images"):
output_path = os.path.join(
output_dir,
f"{os.path.splitext(os.path.basename(image_path))[0]}_result.json",
)
if os.path.exists(output_path):
continue
try:
result = process_image(image_path)
save_result(image_path, result, output_dir)
except Exception as e:
print(f"Error processing {image_path}: {e}")
# Add a delay before retrying to avoid potential rate limit issues
sleep(5)
continue
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Process images using GPT-4 Vision API."
)
parser.add_argument(
"image_dir", type=str, help="The directory containing images to process."
)
parser.add_argument(
"output_dir", type=str, help="The directory to save the output results."
)
args = parser.parse_args()
main(args.image_dir, args.output_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment