Created
November 11, 2023 20:56
-
-
Save kwindla/e79212bf3173e91d906f7443b45163e0 to your computer and use it in GitHub Desktop.
Extract text from a video file using GPT-4V (very simple demo script)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import click | |
import imageio | |
import imageio_ffmpeg | |
import io | |
import base64 | |
from PIL import Image | |
from openai import OpenAI | |
@click.command() | |
@click.option('--framerate', type=int, default=1, | |
help='frames per second to send to GPT-4V') | |
@click.option('--prompt', type=str, default=""" | |
Extract all text in this image. Output only the text. Do not | |
provide any description of the image or any other information in | |
the output. | |
""", | |
help='prompt string to send to GPT-4V with each frame') | |
@click.argument('video_file') | |
# | |
def main(framerate, prompt, video_file): | |
""" | |
Extract text from (a subset of) video frames. | |
Usage: | |
OPEN_AI_API_KEY=<apikey> video2txt.py [--framerate 1] [--prompt "prompt string"] <video file> | |
""" | |
reader = imageio.get_reader(video_file, 'ffmpeg') | |
video_fps = round(reader.get_meta_data()['fps']) | |
nth_frame = round(video_fps / framerate) | |
for i, frame in enumerate(reader): | |
if i % nth_frame == 0: | |
img = Image.fromarray(frame) | |
buf = io.BytesIO() | |
img.save(buf, format='JPEG') | |
b64jpg = base64.b64encode(buf.getvalue()).decode("utf-8") | |
process_frame_with_gpt4v(prompt, b64jpg) | |
# print(base64.b64encode(frame).decode('utf-8')) | |
def process_frame_with_gpt4v(prompt, b64jpg): | |
gpt = OpenAI() | |
response = gpt.chat.completions.create( | |
model="gpt-4-vision-preview", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": prompt | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{b64jpg}" | |
} | |
} | |
] | |
} | |
], | |
max_tokens=1000 | |
) | |
print(response.choices[0].message.content) | |
print("----") | |
# | |
# run | |
# | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment