Skip to content

Instantly share code, notes, and snippets.

@e96031413
Created July 11, 2024 09:06
Show Gist options
  • Save e96031413/a3b981a362dd8dd5ce444205a1ac717e to your computer and use it in GitHub Desktop.
Save e96031413/a3b981a362dd8dd5ce444205a1ac717e to your computer and use it in GitHub Desktop.
# git clone https://github.com/salesforce/BLIP
# cd BLIP
# touch BLIP_img2caption.py
import os
import json
from PIL import Image
import torch
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
from models.blip import blip_decoder
from tqdm import tqdm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
image_folder = "path/to/your/img_folder/"
image_size = 384
model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_capfilt_large.pth'
model = blip_decoder(pretrained=model_url, image_size=image_size, vit='base')
model.eval()
model = model.to(device)
def load_image(image_path, image_size, device):
raw_image = Image.open(image_path).convert('RGB')
transform = transforms.Compose([
transforms.Resize((image_size, image_size), interpolation=InterpolationMode.BICUBIC),
transforms.ToTensor(),
transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
])
image = transform(raw_image).unsqueeze(0).to(device)
return image
def generate_captions(image_folder, image_size, model, device):
captions = {}
for image_name in tqdm(os.listdir(image_folder)):
image_path = os.path.join(image_folder, image_name)
image = load_image(image_path, image_size, device)
with torch.no_grad():
caption = model.generate(image, sample=False, num_beams=3, max_length=20, min_length=5)
captions[image_name] = caption[0]
return captions
captions = generate_captions(image_folder, image_size, model, device)
with open('captions.json', 'w') as f:
json.dump(captions, f, indent=4)
print('Captions generated and saved to captions.json')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment