jeffehobbs/owl.py

## owl.py
# owl.py | jhobbs@advance.net
#
# GOAL: upload a PDF, convert it to an image, and perform OpenAI vision capabilities on the image
#
# TO DO:
# 1. [X] upload PDF or image
# 2. [X] change PDF to image
# 3. [X] upload default prompt
# 4. [X] perform vision API call
# 5. [X] splat response back to client
# 6. [X] fix fuckin heic files
#
# note to self, install this layer when moving to AWS:
# https://github.com/jeylabs/aws-lambda-poppler-layer/releases

import os, configparser, base64, requests
from flask import Flask, Response, request, json, render_template, jsonify
from pdf2image import convert_from_path
from PIL import Image
from pillow_heif import register_heif_opener

# globals
UPLOAD_PATH = '/tmp/'
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
config = configparser.ConfigParser()
config.read(SCRIPT_PATH +'/secrets.txt')
OPENAI_API_KEY = config.get('openai', 'apikey')
app = Flask(__name__)

@app.route('/')
def main():
    return render_template("index.html")

@app.route('/description', methods = ['POST'])
def success():
    if request.method == 'POST':
        f = request.files['file']
        prompt = request.form['prompt']
        max_tokens = int(request.form['max_tokens'])
        #print(prompt)
        image_path = UPLOAD_PATH + f.filename
        if (image_path == UPLOAD_PATH):
        	return render_template("index.html")
        f.save(image_path)
        if 'pdf' in image_path:
        	image_set = convert_pdf(image_path)
        elif 'heic' in image_path:
        	image_set = convert_heic(image_path)
        else:
        	image_set = [image_path]
        #print(image_set)
        description = describe_image(image_set[0], prompt, max_tokens)
        response = description['choices'][0]['message']['content']
        #print(response)
        return render_template("response.html", response=response)

def convert_pdf(pdf_path):
	images = convert_from_path(pdf_path)
	image_set = []
	for i in range(len(images)):
		image_path = pdf_path.replace('.pdf', '_' + str(i) + '.jpg')
		images[i].save(image_path, 'JPEG')
		image_set.append(image_path)
	return image_set

def convert_heic(heic_path):
	register_heif_opener()
	image = Image.open(heic_path)
	filename = heic_path.replace('heic','jpg')
	image.convert('RGB').save(filename)
	image_set = [filename]
	return image_set

def return_apikey(section, key):
    SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
    config = configparser.ConfigParser()
    config.read(SCRIPT_PATH +'/secrets.txt')
    secret = config.get(section, key)
    return(secret)

def encode_image(image_path):
	with open(image_path, "rb") as image_file:
		return base64.b64encode(image_file.read()).decode('utf-8')

def describe_image(image_path, prompt, max_tokens):

	# need to fix this to encompass entire array
	#for image_path in image_set:

	base64_image = encode_image(image_path)

	headers = {
		"Content-Type": "application/json",
		"Authorization": f"Bearer {OPENAI_API_KEY}"
	}

	payload = {
	"model": "gpt-4o",
	"messages": [
	{
	  "role": "user",
	  "content": [
	    {
	      "type": "text",
	      "text": prompt
	    },
	    {
	      "type": "image_url",
	      "image_url": {
	        "url": f"data:image/jpeg;base64,{base64_image}",
	        "detail": "auto"
	      }
	    }
	  ]
	}
	],
	"max_tokens": max_tokens
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
	print(json.dumps(response.json(), indent=4))
	return(response.json())

# main function
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)

#fin
	# owl.py \| jhobbs@advance.net
	#
	# GOAL: upload a PDF, convert it to an image, and perform OpenAI vision capabilities on the image
	#
	# TO DO:
	# 1. [X] upload PDF or image
	# 2. [X] change PDF to image
	# 3. [X] upload default prompt
	# 4. [X] perform vision API call
	# 5. [X] splat response back to client
	# 6. [X] fix fuckin heic files
	#
	# note to self, install this layer when moving to AWS:
	# https://github.com/jeylabs/aws-lambda-poppler-layer/releases

	import os, configparser, base64, requests
	from flask import Flask, Response, request, json, render_template, jsonify
	from pdf2image import convert_from_path
	from PIL import Image
	from pillow_heif import register_heif_opener

	# globals
	UPLOAD_PATH = '/tmp/'
	SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
	config = configparser.ConfigParser()
	config.read(SCRIPT_PATH +'/secrets.txt')
	OPENAI_API_KEY = config.get('openai', 'apikey')
	app = Flask(__name__)

	@app.route('/')
	def main():
	return render_template("index.html")

	@app.route('/description', methods = ['POST'])
	def success():
	if request.method == 'POST':
	f = request.files['file']
	prompt = request.form['prompt']
	max_tokens = int(request.form['max_tokens'])
	#print(prompt)
	image_path = UPLOAD_PATH + f.filename
	if (image_path == UPLOAD_PATH):
	return render_template("index.html")
	f.save(image_path)
	if 'pdf' in image_path:
	image_set = convert_pdf(image_path)
	elif 'heic' in image_path:
	image_set = convert_heic(image_path)
	else:
	image_set = [image_path]
	#print(image_set)
	description = describe_image(image_set[0], prompt, max_tokens)
	response = description['choices'][0]['message']['content']
	#print(response)
	return render_template("response.html", response=response)

	def convert_pdf(pdf_path):
	images = convert_from_path(pdf_path)
	image_set = []
	for i in range(len(images)):
	image_path = pdf_path.replace('.pdf', '_' + str(i) + '.jpg')
	images[i].save(image_path, 'JPEG')
	image_set.append(image_path)
	return image_set

	def convert_heic(heic_path):
	register_heif_opener()
	image = Image.open(heic_path)
	filename = heic_path.replace('heic','jpg')
	image.convert('RGB').save(filename)
	image_set = [filename]
	return image_set

	def return_apikey(section, key):
	SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
	config = configparser.ConfigParser()
	config.read(SCRIPT_PATH +'/secrets.txt')
	secret = config.get(section, key)
	return(secret)

	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	def describe_image(image_path, prompt, max_tokens):

	# need to fix this to encompass entire array
	#for image_path in image_set:

	base64_image = encode_image(image_path)

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {OPENAI_API_KEY}"
	}

	payload = {
	"model": "gpt-4o",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}",
	"detail": "auto"
	}
	}
	]
	}
	],
	"max_tokens": max_tokens
	}

	response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
	print(json.dumps(response.json(), indent=4))
	return(response.json())

	# main function
	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=5000, debug=True)

	#fin