Skip to content

Instantly share code, notes, and snippets.

@rain-1
Last active October 11, 2024 13:24
Show Gist options
  • Save rain-1/cafbd3b04876cee936802d4d3a9a490d to your computer and use it in GitHub Desktop.
Save rain-1/cafbd3b04876cee936802d4d3a9a490d to your computer and use it in GitHub Desktop.
organize images using AI

Organize Images into folders using AI

This is a tool that sorts images into folders using "AI". You create the folders you want the images to be put into.

I got 'claude' to write with a couple prompts. Make venv and install deps with pip.

Here is an imgur album containing the data set I created, and the result of running it with the named folders: https://imgur.com/a/yET80b9

You can see that it sort of works. It gets some things wrong, e.g. xkcd comics.

See also

# written by claude 3.5 sonnet
import os
import json
import hashlib
from PIL import Image
import torch
from transformers import AutoProcessor, AutoModelForCausalLM
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import shutil
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#print("device = " + device)
def get_embedding(text, model):
return model.encode([text])[0]
def classify_text(input_text, categories, model):
input_embedding = get_embedding(input_text, model)
category_embeddings = [get_embedding(cat, model) for cat in categories]
similarities = cosine_similarity([input_embedding], category_embeddings)[0]
most_similar_index = np.argmax(similarities)
return categories[most_similar_index], similarities[most_similar_index]
# Load a pre-trained model
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
def compute_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def load_cache(cache_file):
if os.path.exists(cache_file):
with open(cache_file, 'r') as f:
return json.load(f)
return {}
def save_cache(cache_file, cache):
with open(cache_file, 'w') as f:
json.dump(cache, f, indent=2)
def list_folders(directory):
"""
List all folders in the specified directory.
Args:
directory (str): The path to the directory to search.
Returns:
list: A list of folder names in the specified directory.
"""
try:
# Get all items in the directory
all_items = os.listdir(directory)
# Filter for only directories (folders)
folders = [item for item in all_items if os.path.isdir(os.path.join(directory, item))]
return folders
except FileNotFoundError:
print(f"Error: The directory '{directory}' was not found.")
return []
except PermissionError:
print(f"Error: Permission denied to access the directory '{directory}'.")
return []
except Exception as e:
print(f"An error occurred: {str(e)}")
return []
def organize_images(categories, folder_path, cache_file='image_descriptions_cache.json'):
# Load the model and processor
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco") #.to(device)
model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco").to(device)
# Load cache
cache = load_cache(cache_file)
# Get all image files in the folder
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(image_extensions)]
# Process each image
for image_file in image_files:
image_path = os.path.join(folder_path, image_file)
file_hash = compute_sha256(image_path)
if file_hash in cache:
description = cache[file_hash]
print(f"Image: {image_file} (cached)")
else:
# Open and preprocess the image
image = Image.open(image_path)
inputs = processor(images=image, return_tensors="pt").to(device)
# Generate the image description
with torch.no_grad():
generated_ids = model.generate(
pixel_values=inputs["pixel_values"],
max_length=50,
num_beams=4,
num_return_sequences=1,
)
# Decode the generated text
with torch.no_grad():
description = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Cache the result
cache[file_hash] = description
save_cache(cache_file, cache)
print(f"Image: {image_file} (new)")
print(f"Description: {description}")
classified_category, similarity_score = classify_text(description, categories, sentence_model)
print(f"Classified as: {classified_category}")
print(f"Similarity score: {similarity_score:.4f}")
print()
shutil.copyfile(image_path, folder_path + "/" + classified_category + "/" + image_file)
# Save updated cache
save_cache(cache_file, cache)
if __name__ == "__main__":
folder_path = "/path/to/your/pictures"
labels = list_folders(folder_path)
organize_images(labels, folder_path)
@EtheriVR
Copy link

Really useful stuff, thanks!

@saeedesmaili
Copy link

You could probably use the openai-clip model to embed both images and folder names.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment