Skip to content

Instantly share code, notes, and snippets.

@MadameMinty
Last active February 6, 2024 10:10
Show Gist options
  • Save MadameMinty/aff416c44905c38abbe2c80436893ac6 to your computer and use it in GitHub Desktop.
Save MadameMinty/aff416c44905c38abbe2c80436893ac6 to your computer and use it in GitHub Desktop.
Tag images with LLaVA and colorsort
#!/usr/bin/env python
# coding: utf
# 2024-02-02
# Tag images with LLaVA and colorsort
# # SETUP Windows
# install Python https://www.python.org/downloads/
# ensure you have WSL and a distro like Ubuntu installed from the Microsoft Store
# run `wsl` in a terminal
# run `curl https://ollama.ai/install.sh | sh` in wsl
# run `ollama serve` in wsl
# open a new terminal without closing the above and run `wsl`
# run `ollama pull llava:7b-v1.6-mistral-q5_K_M` to download the model
# you can also use a different model https://ollama.ai/library/llava/tags
# to fit into your VRAM. I recommend `mistral-q*_K_M` family.
# Smaller is faster, too.
#
# # USE
# `wsl`, `ollama serve`
# in a new terminal or from a shortcut, `python llava.py "E:\Photos"`
# the script will connect to ollama API, and process all images
# in the directory and its subdirectories _and overwrite them_
#
# # SETUP Linux
# you know what to do
#
# RESULT
# You can now search for tags in Windows Explorer etc.
# with "tags:something" in the search bar
# and sort by "Comments" to sort by the dominant color
import base64
import requests
from PIL import Image
import re
from pathlib import Path
r_tags = re.compile(r'[^a-zA-Z, ]')
r_datetime = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}\.\d{2}\.\d{2}')
MODEL = "llava:7b-v1.6-mistral-q5_K_M"
# dominant color constants
# blend weights
V = 33
H = V*V
# technical
MAX_COLOR = H*255 + V*255 + 255
int16u = 65535
def dominant_color(image) -> bytes:
h, s, v = image\
.convert('HSV')\
.resize((1, 1), resample=0)\
.getpixel((0, 0))
value = H*h + V*v + s
value = str(int((value/MAX_COLOR) * int16u))
value_bytes: bytes = value.encode('utf-16le')
return value_bytes
def extract_title(file: Path) -> str:
title: str = file.stem
title = re.sub(r_datetime, '', title).strip()
if title:
title = f' titled "{title}"'
return title
def encode_image_to_base64(file: Path) -> str:
with open(file, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def ollama(file: Path) -> str:
image_base64: str = encode_image_to_base64(file)
title: str = extract_title(file)
response = requests.post('http://localhost:11434/api/generate', json={
"model": MODEL,
"prompt": f'''Generate a comma-separated list of five dictionary words describing this image{title}.''',
"stream": False,
"images": [image_base64]
})
if response.status_code == 200:
data: dict = response.json()
tags: str = data.get('response', '').lower()
# check if explicit or refusal
if 'explicit' in tags \
or 'sexual' in tags \
or 'as an AI' in tags:
return 'explicit'
# check if the response is comma-separated
if ',' not in tags:
if '-' in tags:
tags = tags.replace('-', ';')
if '\n' in tags:
tags = tags.replace('\r', '')
tags = tags.replace('\n', ';')
tags = tags.replace(';;', ';').replace(';;', ';')
# remove illegal characters
tags = re.sub(r_tags, '', tags).replace('-', ' ')
# semicolon;separated;deduplicated
tags_set: set = set(tags.split(','))
tags_set = {tag.strip() for tag in tags_set}
tags = ';'.join(tags_set)
return tags
else:
return ''
def process_images(directory: str = r'E:\Photos'):
extensions = {".jpg", ".jpeg", ".jfif", }
files = (
p.absolute()
for p in Path(directory).glob("**/*")
if p.suffix.lower() in extensions)
for file in files:
with Image.open(file) as image:
# if image.format == 'JPEG':
exif = image.getexif()
# keep trying until we get a valid tag list
tags: str = ''
tries: int = 0
while (not tags or len(tags) > 64) and tries < 4:
tags: str = ollama(file)
tries += 1
# https://exiftool.org/TagNames/EXIF.html
# 0x9c9b XPTitle Title
# 0x9c9c XPComment Comments <- dominant color
# 0x9c9d XPAuthor Authors
# 0x9c9e XPKeywords Tags;like;this <- tags
# 0x9c9f XPSubject unreadable
tags_bytes: bytes = tags.encode('utf-16le')
exif[0x9c9e] = tags_bytes
exif[0x9c9c] = dominant_color(image)
image.save(file, 'JPEG', exif=exif)
if __name__ == "__main__":
import sys
process_images(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment