Skip to content

Instantly share code, notes, and snippets.

@stevenheidel
Created April 13, 2024 23:05
Show Gist options
  • Save stevenheidel/629e7b020695e28e0bcd110ccf7a3355 to your computer and use it in GitHub Desktop.
Save stevenheidel/629e7b020695e28e0bcd110ccf7a3355 to your computer and use it in GitHub Desktop.
RealWorldQA on gpt-4-turbo
from tenacity import retry
import asyncio
import os
import base64
import openai
import sys
import json
from tqdm.asyncio import tqdm
import pandas as pd
client = openai.AsyncClient()
folder = "/Users/stevenh/Downloads/realworldqa/"
parallelism = 50
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
@retry
async def ask_gpt_with_image(base64_image, question):
response = await client.chat.completions.create(
model="gpt-4-turbo",
temperature=0.7,
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "high",
}
},
{"type": "text", "text": question},
]
},
],
)
return response.choices[0].message.content
async def process_entry(entry):
image = entry["image"]
question = entry["question"]
expected_answer = entry["answer"]
image_path = os.path.join(folder, "images", entry["image"])
base64_image = encode_image(image_path)
gpt_answer = await ask_gpt_with_image(base64_image, question)
return {
"question": question,
"image": image,
"expected_answer": expected_answer,
"gpt_answer": gpt_answer,
}
with open(os.path.join(folder, "annotations.json")) as f:
entries = json.load(f)
semaphore = asyncio.Semaphore(parallelism)
tqdm_bar = tqdm(total=len(entries), file=sys.stdout)
async def task(entry):
async with semaphore:
result = await process_entry(entry)
tqdm_bar.update(1)
return result
results = await asyncio.gather(*[task(entry) for entry in entries])
df = pd.DataFrame(results)
def get_formatted_answer(row):
answer = row["gpt_answer"]
answer = answer.split(".")[0].split(":")[0]
subs = {
"zero": "0", "none": "0",
"one": "1", "two": "2", "three": "3", "four": "4", "five": "5",
"six": "6", "seven": "7", "eight": "8", "nine": "9", "ten": "10",
"true": "yes", "false": "no",
}
return subs.get(answer.lower(), answer)
df["gpt_formatted_answer"] = df.apply(get_formatted_answer, axis=1)
df["match"] = df.apply(
lambda row: row["gpt_formatted_answer"].lower() == row["expected_answer"].lower(),
axis=1
)
print(df["match"].mean())
# 0.6313725490196078
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment