Created
June 19, 2024 02:04
-
-
Save jeffehobbs/db292a3dc5a5577469b0cd18ff300825 to your computer and use it in GitHub Desktop.
image upload and detection flask app
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# owl.py | jhobbs@advance.net | |
# | |
# GOAL: upload a PDF, convert it to an image, and perform OpenAI vision capabilities on the image | |
# | |
# TO DO: | |
# 1. [X] upload PDF or image | |
# 2. [X] change PDF to image | |
# 3. [X] upload default prompt | |
# 4. [X] perform vision API call | |
# 5. [X] splat response back to client | |
# 6. [X] fix fuckin heic files | |
# | |
# note to self, install this layer when moving to AWS: | |
# https://github.com/jeylabs/aws-lambda-poppler-layer/releases | |
import os, configparser, base64, requests | |
from flask import Flask, Response, request, json, render_template, jsonify | |
from pdf2image import convert_from_path | |
from PIL import Image | |
from pillow_heif import register_heif_opener | |
# globals | |
UPLOAD_PATH = '/tmp/' | |
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) | |
config = configparser.ConfigParser() | |
config.read(SCRIPT_PATH +'/secrets.txt') | |
OPENAI_API_KEY = config.get('openai', 'apikey') | |
app = Flask(__name__) | |
@app.route('/') | |
def main(): | |
return render_template("index.html") | |
@app.route('/description', methods = ['POST']) | |
def success(): | |
if request.method == 'POST': | |
f = request.files['file'] | |
prompt = request.form['prompt'] | |
max_tokens = int(request.form['max_tokens']) | |
#print(prompt) | |
image_path = UPLOAD_PATH + f.filename | |
if (image_path == UPLOAD_PATH): | |
return render_template("index.html") | |
f.save(image_path) | |
if 'pdf' in image_path: | |
image_set = convert_pdf(image_path) | |
elif 'heic' in image_path: | |
image_set = convert_heic(image_path) | |
else: | |
image_set = [image_path] | |
#print(image_set) | |
description = describe_image(image_set[0], prompt, max_tokens) | |
response = description['choices'][0]['message']['content'] | |
#print(response) | |
return render_template("response.html", response=response) | |
def convert_pdf(pdf_path): | |
images = convert_from_path(pdf_path) | |
image_set = [] | |
for i in range(len(images)): | |
image_path = pdf_path.replace('.pdf', '_' + str(i) + '.jpg') | |
images[i].save(image_path, 'JPEG') | |
image_set.append(image_path) | |
return image_set | |
def convert_heic(heic_path): | |
register_heif_opener() | |
image = Image.open(heic_path) | |
filename = heic_path.replace('heic','jpg') | |
image.convert('RGB').save(filename) | |
image_set = [filename] | |
return image_set | |
def return_apikey(section, key): | |
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) | |
config = configparser.ConfigParser() | |
config.read(SCRIPT_PATH +'/secrets.txt') | |
secret = config.get(section, key) | |
return(secret) | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
def describe_image(image_path, prompt, max_tokens): | |
# need to fix this to encompass entire array | |
#for image_path in image_set: | |
base64_image = encode_image(image_path) | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {OPENAI_API_KEY}" | |
} | |
payload = { | |
"model": "gpt-4o", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": prompt | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}", | |
"detail": "auto" | |
} | |
} | |
] | |
} | |
], | |
"max_tokens": max_tokens | |
} | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
print(json.dumps(response.json(), indent=4)) | |
return(response.json()) | |
# main function | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=5000, debug=True) | |
#fin |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment