Skip to content

Instantly share code, notes, and snippets.

@t04glovern
Last active March 17, 2024 11:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save t04glovern/291ed85317e43dce02dce158d9ec4642 to your computer and use it in GitHub Desktop.
Save t04glovern/291ed85317e43dce02dce158d9ec4642 to your computer and use it in GitHub Desktop.
This script captures images from an RTSP stream at regular intervals and uses the Claude 3 Haiku model to generate descriptions based on a custom prompt.
#!/usr/bin/env python3
"""
This script captures images from an RTSP stream at regular intervals and uses
the Claude 3 Haiku model to generate descriptions based on a custom prompt.
Install:
python3 -m venv .venv
source .venv/bin/activate
pip3 install boto3==1.34.62 opencv-python-headless==4.9.0.80
curl https://gist.githubusercontent.com/t04glovern/291ed85317e43dce02dce158d9ec4642/raw \
> rtsp_claude3_haiku_bedrock.py \
&& chmod +x rtsp_claude3_haiku_bedrock.py
Usage:
./rtsp_claude3_haiku_bedrock.py \
--url "rtsp://test:password@192.168.188.60:554//h264Preview_01_sub" \
--prompt "Describe anything interesting in the security camera footage."
"""
import argparse
import base64
import json
import logging
import time
import boto3
import cv2
logging.basicConfig(level=logging.INFO)
aws_region: str = "us-east-1"
def image_to_base64(image):
"""
Converts an image to a base64 encoded string.
Parameters:
image (numpy.ndarray): The image to encode.
Returns:
str: The base64 encoded string of the image.
"""
_, buffer = cv2.imencode('.jpg', image)
jpg_as_text = base64.b64encode(buffer)
return jpg_as_text.decode()
def call_claude_haiku(base64_string, custom_prompt):
"""
Calls the Claude 3 Haiku model with a base64 encoded image and a custom prompt.
Parameters:
base64_string (str): The base64 encoded string of the image.
custom_prompt (str): The custom prompt to send to Claude 3 Haiku.
Returns:
str: The response from Claude 3 Haiku.
"""
prompt_config = {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 4096,
"messages": [
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": base64_string,
},
},
{
"type": "text",
"text": custom_prompt,
},
],
}
],
}
client = boto3.client(service_name="bedrock-runtime", region_name=aws_region)
response = client.invoke_model(
body=json.dumps(prompt_config),
modelId="anthropic.claude-3-haiku-20240307-v1:0",
accept="application/json",
contentType="application/json"
)
response_body = json.loads(response.get("body").read())
results = response_body.get("content")[0].get("text")
return results
def capture_and_call_claude(rtsp_url: str, custom_prompt: str):
"""
Captures images from an RTSP stream at regular intervals and uses Claude 3 Haiku
to generate descriptions based on a custom prompt.
Parameters:
rtsp_url (str): The RTSP stream URL.
custom_prompt (str): The custom prompt to send to Claude 3 Haiku.
"""
cap = cv2.VideoCapture(rtsp_url)
if not cap.isOpened():
logging.error("Could not open the stream.")
exit()
try:
while True:
ret, frame = cap.read()
if not ret:
logging.error("Can't receive frame (stream end?). Exiting ...")
break
base64_string = image_to_base64(frame)
results = call_claude_haiku(base64_string, custom_prompt)
logging.info(results)
time.sleep(60)
finally:
cap.release()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process RTSP stream and analyze with Claude 3 Haiku.")
parser.add_argument("--url", type=str, required=True, help="RTSP stream URL")
parser.add_argument("--prompt", type=str, required=False, help="Custom prompt for Claude 3 Haiku", default="Describe anything interesting in the security camera footage.")
args = parser.parse_args()
capture_and_call_claude(args.url, args.prompt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment