Skip to content

Instantly share code, notes, and snippets.

@kwindla
Created November 11, 2023 20:56
Show Gist options
  • Save kwindla/e79212bf3173e91d906f7443b45163e0 to your computer and use it in GitHub Desktop.
Save kwindla/e79212bf3173e91d906f7443b45163e0 to your computer and use it in GitHub Desktop.
Extract text from a video file using GPT-4V (very simple demo script)
#!/usr/bin/env python3
import click
import imageio
import imageio_ffmpeg
import io
import base64
from PIL import Image
from openai import OpenAI
@click.command()
@click.option('--framerate', type=int, default=1,
help='frames per second to send to GPT-4V')
@click.option('--prompt', type=str, default="""
Extract all text in this image. Output only the text. Do not
provide any description of the image or any other information in
the output.
""",
help='prompt string to send to GPT-4V with each frame')
@click.argument('video_file')
#
def main(framerate, prompt, video_file):
"""
Extract text from (a subset of) video frames.
Usage:
OPEN_AI_API_KEY=<apikey> video2txt.py [--framerate 1] [--prompt "prompt string"] <video file>
"""
reader = imageio.get_reader(video_file, 'ffmpeg')
video_fps = round(reader.get_meta_data()['fps'])
nth_frame = round(video_fps / framerate)
for i, frame in enumerate(reader):
if i % nth_frame == 0:
img = Image.fromarray(frame)
buf = io.BytesIO()
img.save(buf, format='JPEG')
b64jpg = base64.b64encode(buf.getvalue()).decode("utf-8")
process_frame_with_gpt4v(prompt, b64jpg)
# print(base64.b64encode(frame).decode('utf-8'))
def process_frame_with_gpt4v(prompt, b64jpg):
gpt = OpenAI()
response = gpt.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{b64jpg}"
}
}
]
}
],
max_tokens=1000
)
print(response.choices[0].message.content)
print("----")
#
# run
#
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment