MadameMinty/llava.py

## llava.py
#!/usr/bin/env python
# coding: utf
# 2024-02-02
# Tag images with LLaVA and colorsort

# # SETUP Windows
# install Python https://www.python.org/downloads/
# ensure you have WSL and a distro like Ubuntu installed from the Microsoft Store
# run `wsl` in a terminal
# run `curl https://ollama.ai/install.sh | sh` in wsl
# run `ollama serve` in wsl
# open a new terminal without closing the above and run `wsl`
# run `ollama pull llava:7b-v1.6-mistral-q5_K_M` to download the model
# you can also use a different model https://ollama.ai/library/llava/tags
# to fit into your VRAM. I recommend `mistral-q*_K_M` family.
# Smaller is faster, too.
#
# # USE
# `wsl`, `ollama serve`
# in a new terminal or from a shortcut, `python llava.py "E:\Photos"`
# the script will connect to ollama API, and process all images
# in the directory and its subdirectories _and overwrite them_
#
# # SETUP Linux
# you know what to do
#
# RESULT
# You can now search for tags in Windows Explorer etc.
# with "tags:something" in the search bar
# and sort by "Comments" to sort by the dominant color

import base64
import requests
from PIL import Image
import re
from pathlib import Path

r_tags = re.compile(r'[^a-zA-Z, ]')
r_datetime = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}\.\d{2}\.\d{2}')

MODEL = "llava:7b-v1.6-mistral-q5_K_M"

# dominant color constants
# blend weights
V = 33
H = V*V
# technical
MAX_COLOR = H*255 + V*255 + 255
int16u = 65535


def dominant_color(image) -> bytes:
    h, s, v = image\
        .convert('HSV')\
        .resize((1, 1), resample=0)\
        .getpixel((0, 0))
    value = H*h + V*v + s
    value = str(int((value/MAX_COLOR) * int16u))
    value_bytes: bytes = value.encode('utf-16le')
    return value_bytes


def extract_title(file: Path) -> str:
    title: str = file.stem
    title = re.sub(r_datetime, '', title).strip()

    if title:
        title = f' titled "{title}"'
    return title


def encode_image_to_base64(file: Path) -> str:
    with open(file, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


def ollama(file: Path) -> str:
    image_base64: str = encode_image_to_base64(file)
    title: str = extract_title(file)

    response = requests.post('http://localhost:11434/api/generate', json={
        "model": MODEL,
        "prompt": f'''Generate a comma-separated list of five dictionary words describing this image{title}.''',
        "stream": False,
        "images": [image_base64]
    })
    if response.status_code == 200:
        data: dict = response.json()
        tags: str = data.get('response', '').lower()

        # check if explicit or refusal
        if 'explicit' in tags \
        or 'sexual' in tags \
        or 'as an AI' in tags:
            return 'explicit'

        # check if the response is comma-separated
        if ',' not in tags:
            if '-' in tags:
                tags = tags.replace('-', ';')
            if '\n' in tags:
                tags = tags.replace('\r', '')
                tags = tags.replace('\n', ';')
            tags = tags.replace(';;', ';').replace(';;', ';')

        # remove illegal characters
        tags = re.sub(r_tags, '', tags).replace('-', ' ')

        # semicolon;separated;deduplicated
        tags_set: set = set(tags.split(','))
        tags_set = {tag.strip() for tag in tags_set}
        tags = ';'.join(tags_set)

        return tags
    else:
        return ''


def process_images(directory: str = r'E:\Photos'):
    extensions = {".jpg", ".jpeg", ".jfif", }
    files = (
        p.absolute()
        for p in Path(directory).glob("**/*")
        if p.suffix.lower() in extensions)

    for file in files:
        with Image.open(file) as image:
            # if image.format == 'JPEG':
            exif = image.getexif()

            # keep trying until we get a valid tag list
            tags: str = ''
            tries: int = 0
            while (not tags or len(tags) > 64) and tries < 4:
                tags: str = ollama(file)
                tries += 1

            # https://exiftool.org/TagNames/EXIF.html
            # 0x9c9b	XPTitle		Title
            # 0x9c9c	XPComment	Comments	<- dominant color
            # 0x9c9d	XPAuthor	Authors
            # 0x9c9e	XPKeywords	Tags;like;this	<- tags
            # 0x9c9f	XPSubject	unreadable
            tags_bytes: bytes = tags.encode('utf-16le')
            exif[0x9c9e] = tags_bytes
            exif[0x9c9c] = dominant_color(image)
            image.save(file, 'JPEG', exif=exif)


if __name__ == "__main__":
    import sys
    process_images(sys.argv[1])
	#!/usr/bin/env python
	# coding: utf
	# 2024-02-02
	# Tag images with LLaVA and colorsort

	# # SETUP Windows
	# install Python https://www.python.org/downloads/
	# ensure you have WSL and a distro like Ubuntu installed from the Microsoft Store
	# run `wsl` in a terminal
	# run `curl https://ollama.ai/install.sh \| sh` in wsl
	# run `ollama serve` in wsl
	# open a new terminal without closing the above and run `wsl`
	# run `ollama pull llava:7b-v1.6-mistral-q5_K_M` to download the model
	# you can also use a different model https://ollama.ai/library/llava/tags
	# to fit into your VRAM. I recommend `mistral-q*_K_M` family.
	# Smaller is faster, too.
	#
	# # USE
	# `wsl`, `ollama serve`
	# in a new terminal or from a shortcut, `python llava.py "E:\Photos"`
	# the script will connect to ollama API, and process all images
	# in the directory and its subdirectories _and overwrite them_
	#
	# # SETUP Linux
	# you know what to do
	#
	# RESULT
	# You can now search for tags in Windows Explorer etc.
	# with "tags:something" in the search bar
	# and sort by "Comments" to sort by the dominant color

	import base64
	import requests
	from PIL import Image
	import re
	from pathlib import Path

	r_tags = re.compile(r'[^a-zA-Z, ]')
	r_datetime = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}\.\d{2}\.\d{2}')

	MODEL = "llava:7b-v1.6-mistral-q5_K_M"

	# dominant color constants
	# blend weights
	V = 33
	H = V*V
	# technical
	MAX_COLOR = H255 + V255 + 255
	int16u = 65535


	def dominant_color(image) -> bytes:
	h, s, v = image\
	.convert('HSV')\
	.resize((1, 1), resample=0)\
	.getpixel((0, 0))
	value = Hh + Vv + s
	value = str(int((value/MAX_COLOR) * int16u))
	value_bytes: bytes = value.encode('utf-16le')
	return value_bytes


	def extract_title(file: Path) -> str:
	title: str = file.stem
	title = re.sub(r_datetime, '', title).strip()

	if title:
	title = f' titled "{title}"'
	return title


	def encode_image_to_base64(file: Path) -> str:
	with open(file, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')


	def ollama(file: Path) -> str:
	image_base64: str = encode_image_to_base64(file)
	title: str = extract_title(file)

	response = requests.post('http://localhost:11434/api/generate', json={
	"model": MODEL,
	"prompt": f'''Generate a comma-separated list of five dictionary words describing this image{title}.''',
	"stream": False,
	"images": [image_base64]
	})
	if response.status_code == 200:
	data: dict = response.json()
	tags: str = data.get('response', '').lower()

	# check if explicit or refusal
	if 'explicit' in tags \
	or 'sexual' in tags \
	or 'as an AI' in tags:
	return 'explicit'

	# check if the response is comma-separated
	if ',' not in tags:
	if '-' in tags:
	tags = tags.replace('-', ';')
	if '\n' in tags:
	tags = tags.replace('\r', '')
	tags = tags.replace('\n', ';')
	tags = tags.replace(';;', ';').replace(';;', ';')

	# remove illegal characters
	tags = re.sub(r_tags, '', tags).replace('-', ' ')

	# semicolon;separated;deduplicated
	tags_set: set = set(tags.split(','))
	tags_set = {tag.strip() for tag in tags_set}
	tags = ';'.join(tags_set)

	return tags
	else:
	return ''


	def process_images(directory: str = r'E:\Photos'):
	extensions = {".jpg", ".jpeg", ".jfif", }
	files = (
	p.absolute()
	for p in Path(directory).glob("*/")
	if p.suffix.lower() in extensions)

	for file in files:
	with Image.open(file) as image:
	# if image.format == 'JPEG':
	exif = image.getexif()

	# keep trying until we get a valid tag list
	tags: str = ''
	tries: int = 0
	while (not tags or len(tags) > 64) and tries < 4:
	tags: str = ollama(file)
	tries += 1

	# https://exiftool.org/TagNames/EXIF.html
	# 0x9c9b XPTitle Title
	# 0x9c9c XPComment Comments <- dominant color
	# 0x9c9d XPAuthor Authors
	# 0x9c9e XPKeywords Tags;like;this <- tags
	# 0x9c9f XPSubject unreadable
	tags_bytes: bytes = tags.encode('utf-16le')
	exif[0x9c9e] = tags_bytes
	exif[0x9c9c] = dominant_color(image)
	image.save(file, 'JPEG', exif=exif)


	if __name__ == "__main__":
	import sys
	process_images(sys.argv[1])