Skip to content

Instantly share code, notes, and snippets.

@AwaisKamran
Created May 15, 2024 21:09
Show Gist options
  • Save AwaisKamran/f1e2c76a4b9ea3740632d85a79bb0739 to your computer and use it in GitHub Desktop.
Save AwaisKamran/f1e2c76a4b9ea3740632d85a79bb0739 to your computer and use it in GitHub Desktop.
This script lets you process videos using openAI's GPT4-o model
from dotenv import load_dotenv
from openai import OpenAI
import cv2
import base64
import os
load_dotenv()
key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=key)
video = cv2.VideoCapture("PATH TO YOUR VIDEO")
# Extract frames from video
base64Frames = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
video.release()
print(len(base64Frames), "frames read.\n")
# Create chat prompt
PROMPT_MESSAGES = [
{
"role": "user",
"content": [
"These are frames from a video that I want to depict. Explain what is in the video in a summary paragraph.",
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::30]),
],
},
]
params = {
"model": "gpt-4o",
"messages": PROMPT_MESSAGES,
"max_tokens": 300,
}
result = client.chat.completions.create(**params)
print(result.choices[0].message.content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment