ali0une/image_summary.py Secret

## image_summary.py
## references
## https://plainenglish.io/community/generate-a-summary-of-an-image-with-an-llm-in-python-0fc069
## https://huggingface.co/mys/ggml_tree/main

from pathlib import Path
import glob
import subprocess
import os

LLAVA_EXEC_PATH = "./bin/llava-cli"
MODEL_PATH = "./models/ggml-model-f16.gguf"
MMPROJ_PATH = "./models/mmproj-model-f16.gguf"

DATA_DIR = "data"
IMAGE_DIR = Path(DATA_DIR, "img")
TXT_DIR = Path(DATA_DIR, "txt")

types = ('*.jpg', '*.png') # the tuple of file types
image_paths = []
for files in types:
        image_paths.extend(sorted(glob.glob(str(IMAGE_DIR.joinpath(files)))))
#print(image_paths)

txt_paths = sorted(glob.glob(str(TXT_DIR.joinpath("*.txt"))))

TEMP = 0.1
## for llava 1.5
PROMPT = "You are an assistant who perfectly describes images."

bash_command = f"{LLAVA_EXEC_PATH} -m {MODEL_PATH} --mmproj {MMPROJ_PATH} --temp {TEMP} -p '{PROMPT}' --ctx-size 0"
#print(bash_command)
# Bash command output
# ./bin/llava-cli -m ./models/ggml-model-f16.gguf --mmproj ./models/mmproj-model-f16.gguf --temp 0.1 -p "Describe the image." --ctx-size 0

for image_path in image_paths:
    image_name = Path(image_path).stem
    image_summary_path = TXT_DIR.joinpath(image_name + ".txt")

    if not os.path.exists(image_summary_path):
        print(f"Processing {image_path}")
        # add input image and output txt filenames to bash command
        bash_command_cur = f"{bash_command} --image '{image_path}' > '{image_summary_path}'"
        print(bash_command_cur)

        # run the bash command
        process = subprocess.Popen(
            bash_command_cur, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )

        # get the output and error from the command
        output, error = process.communicate()

        # commment output and error for less verbose output
#        print("Output:")
#        print(output.decode("utf-8"))

#        print("Error:")
#        print(error.decode("utf-8"))

        # return the code of the command
        return_code = process.returncode
#        print(f"Return code: {return_code}")
#        print()

        print("Done")

        # clean txt files
        bash_command_sed = f"sed -i '/_/d' '{image_summary_path}' && sed -i '/^[[:space:]]*$/d' '{image_summary_path}'"
        print(bash_command_sed)

        # run the bash command
        process = subprocess.Popen(
            bash_command_sed, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )

        print("txt files cleaned")

    else:
        print(f"Already processed {image_summary_path}")
	## references
	## https://plainenglish.io/community/generate-a-summary-of-an-image-with-an-llm-in-python-0fc069
	## https://huggingface.co/mys/ggml_tree/main

	from pathlib import Path
	import glob
	import subprocess
	import os

	LLAVA_EXEC_PATH = "./bin/llava-cli"
	MODEL_PATH = "./models/ggml-model-f16.gguf"
	MMPROJ_PATH = "./models/mmproj-model-f16.gguf"

	DATA_DIR = "data"
	IMAGE_DIR = Path(DATA_DIR, "img")
	TXT_DIR = Path(DATA_DIR, "txt")

	types = ('.jpg', '.png') # the tuple of file types
	image_paths = []
	for files in types:
	image_paths.extend(sorted(glob.glob(str(IMAGE_DIR.joinpath(files)))))
	#print(image_paths)

	txt_paths = sorted(glob.glob(str(TXT_DIR.joinpath("*.txt"))))

	TEMP = 0.1
	## for llava 1.5
	PROMPT = "You are an assistant who perfectly describes images."

	bash_command = f"{LLAVA_EXEC_PATH} -m {MODEL_PATH} --mmproj {MMPROJ_PATH} --temp {TEMP} -p '{PROMPT}' --ctx-size 0"
	#print(bash_command)
	# Bash command output
	# ./bin/llava-cli -m ./models/ggml-model-f16.gguf --mmproj ./models/mmproj-model-f16.gguf --temp 0.1 -p "Describe the image." --ctx-size 0

	for image_path in image_paths:
	image_name = Path(image_path).stem
	image_summary_path = TXT_DIR.joinpath(image_name + ".txt")

	if not os.path.exists(image_summary_path):
	print(f"Processing {image_path}")
	# add input image and output txt filenames to bash command
	bash_command_cur = f"{bash_command} --image '{image_path}' > '{image_summary_path}'"
	print(bash_command_cur)

	# run the bash command
	process = subprocess.Popen(
	bash_command_cur, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
	)

	# get the output and error from the command
	output, error = process.communicate()

	# commment output and error for less verbose output
	# print("Output:")
	# print(output.decode("utf-8"))

	# print("Error:")
	# print(error.decode("utf-8"))

	# return the code of the command
	return_code = process.returncode
	# print(f"Return code: {return_code}")
	# print()

	print("Done")

	# clean txt files
	bash_command_sed = f"sed -i '/_/d' '{image_summary_path}' && sed -i '/^[[:space:]]*$/d' '{image_summary_path}'"
	print(bash_command_sed)

	# run the bash command
	process = subprocess.Popen(
	bash_command_sed, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
	)

	print("txt files cleaned")

	else:
	print(f"Already processed {image_summary_path}")