-
-
Save ali0une/f9a96f6707a743de50361dd332597e14 to your computer and use it in GitHub Desktop.
Text Summaries of images with Python and LLaVa AI model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## references | |
## https://plainenglish.io/community/generate-a-summary-of-an-image-with-an-llm-in-python-0fc069 | |
## https://huggingface.co/mys/ggml_tree/main | |
from pathlib import Path | |
import glob | |
import subprocess | |
import os | |
LLAVA_EXEC_PATH = "./bin/llava-cli" | |
MODEL_PATH = "./models/ggml-model-f16.gguf" | |
MMPROJ_PATH = "./models/mmproj-model-f16.gguf" | |
DATA_DIR = "data" | |
IMAGE_DIR = Path(DATA_DIR, "img") | |
TXT_DIR = Path(DATA_DIR, "txt") | |
types = ('*.jpg', '*.png') # the tuple of file types | |
image_paths = [] | |
for files in types: | |
image_paths.extend(sorted(glob.glob(str(IMAGE_DIR.joinpath(files))))) | |
#print(image_paths) | |
txt_paths = sorted(glob.glob(str(TXT_DIR.joinpath("*.txt")))) | |
TEMP = 0.1 | |
## for llava 1.5 | |
PROMPT = "You are an assistant who perfectly describes images." | |
bash_command = f"{LLAVA_EXEC_PATH} -m {MODEL_PATH} --mmproj {MMPROJ_PATH} --temp {TEMP} -p '{PROMPT}' --ctx-size 0" | |
#print(bash_command) | |
# Bash command output | |
# ./bin/llava-cli -m ./models/ggml-model-f16.gguf --mmproj ./models/mmproj-model-f16.gguf --temp 0.1 -p "Describe the image." --ctx-size 0 | |
for image_path in image_paths: | |
image_name = Path(image_path).stem | |
image_summary_path = TXT_DIR.joinpath(image_name + ".txt") | |
if not os.path.exists(image_summary_path): | |
print(f"Processing {image_path}") | |
# add input image and output txt filenames to bash command | |
bash_command_cur = f"{bash_command} --image '{image_path}' > '{image_summary_path}'" | |
print(bash_command_cur) | |
# run the bash command | |
process = subprocess.Popen( | |
bash_command_cur, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
) | |
# get the output and error from the command | |
output, error = process.communicate() | |
# commment output and error for less verbose output | |
# print("Output:") | |
# print(output.decode("utf-8")) | |
# print("Error:") | |
# print(error.decode("utf-8")) | |
# return the code of the command | |
return_code = process.returncode | |
# print(f"Return code: {return_code}") | |
# print() | |
print("Done") | |
# clean txt files | |
bash_command_sed = f"sed -i '/_/d' '{image_summary_path}' && sed -i '/^[[:space:]]*$/d' '{image_summary_path}'" | |
print(bash_command_sed) | |
# run the bash command | |
process = subprocess.Popen( | |
bash_command_sed, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
) | |
print("txt files cleaned") | |
else: | |
print(f"Already processed {image_summary_path}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment