Skip to content

Instantly share code, notes, and snippets.

@ovshake
Created March 21, 2023 17:43
Show Gist options
  • Save ovshake/69efb594f3b1e8d98b34687b16916145 to your computer and use it in GitHub Desktop.
Save ovshake/69efb594f3b1e8d98b34687b16916145 to your computer and use it in GitHub Desktop.
Generating captions from clip interrogator
import sys
sys.path.append('src/blip')
sys.path.append('clip-interrogator')
import torch
from clip_interrogator import Config, Interrogator
from PIL import Image
from clip_interrogator import Config, Interrogator
from tqdm import tqdm
config = Config()
config.device = 'cuda' if torch.cuda.is_available() else 'cpu'
config.blip_offload = False if torch.cuda.is_available() else True
config.chunk_size = 2048
config.flavor_intermediate_count = 512
config.blip_num_beams = 64
ci = Interrogator(config)
def inference(image_path, best_max_flavors):
image = Image.open(image_path)
image = image.convert('RGB')
prompt_result = ci.interrogate(image, max_flavors=int(best_max_flavors))
print("mode best: " + prompt_result)
return prompt_result
# read filepaths from a txt file
# and run inference on each image
# and write the results to a txt file
def run_inference_on_images(filepaths, best_max_flavors):
with open('results.txt', 'w') as file:
for filepath in tqdm(filepaths):
file.write(inference(filepath, best_max_flavors) + '\n')
# read filepaths from a txt file
filepaths = open('data.txt', 'r').read().splitlines()
run_inference_on_images(filepaths, 5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment