Skip to content

Instantly share code, notes, and snippets.

@ali0une
Last active May 8, 2024 16:52
Show Gist options
  • Save ali0une/f9a96f6707a743de50361dd332597e14 to your computer and use it in GitHub Desktop.
Save ali0une/f9a96f6707a743de50361dd332597e14 to your computer and use it in GitHub Desktop.
Text Summaries of images with Python and LLaVa AI model
## references
## https://plainenglish.io/community/generate-a-summary-of-an-image-with-an-llm-in-python-0fc069
## https://huggingface.co/mys/ggml_tree/main
from pathlib import Path
import glob
import subprocess
import os
LLAVA_EXEC_PATH = "./bin/llava-cli"
MODEL_PATH = "./models/ggml-model-f16.gguf"
MMPROJ_PATH = "./models/mmproj-model-f16.gguf"
DATA_DIR = "data"
IMAGE_DIR = Path(DATA_DIR, "img")
TXT_DIR = Path(DATA_DIR, "txt")
types = ('*.jpg', '*.png') # the tuple of file types
image_paths = []
for files in types:
image_paths.extend(sorted(glob.glob(str(IMAGE_DIR.joinpath(files)))))
#print(image_paths)
txt_paths = sorted(glob.glob(str(TXT_DIR.joinpath("*.txt"))))
TEMP = 0.1
## for llava 1.5
PROMPT = "You are an assistant who perfectly describes images."
bash_command = f"{LLAVA_EXEC_PATH} -m {MODEL_PATH} --mmproj {MMPROJ_PATH} --temp {TEMP} -p '{PROMPT}' --ctx-size 0"
#print(bash_command)
# Bash command output
# ./bin/llava-cli -m ./models/ggml-model-f16.gguf --mmproj ./models/mmproj-model-f16.gguf --temp 0.1 -p "Describe the image." --ctx-size 0
for image_path in image_paths:
image_name = Path(image_path).stem
image_summary_path = TXT_DIR.joinpath(image_name + ".txt")
if not os.path.exists(image_summary_path):
print(f"Processing {image_path}")
# add input image and output txt filenames to bash command
bash_command_cur = f"{bash_command} --image '{image_path}' > '{image_summary_path}'"
print(bash_command_cur)
# run the bash command
process = subprocess.Popen(
bash_command_cur, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
# get the output and error from the command
output, error = process.communicate()
# commment output and error for less verbose output
# print("Output:")
# print(output.decode("utf-8"))
# print("Error:")
# print(error.decode("utf-8"))
# return the code of the command
return_code = process.returncode
# print(f"Return code: {return_code}")
# print()
print("Done")
# clean txt files
bash_command_sed = f"sed -i '/_/d' '{image_summary_path}' && sed -i '/^[[:space:]]*$/d' '{image_summary_path}'"
print(bash_command_sed)
# run the bash command
process = subprocess.Popen(
bash_command_sed, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
print("txt files cleaned")
else:
print(f"Already processed {image_summary_path}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment