Skip to content

Instantly share code, notes, and snippets.

@juananpe
Created July 14, 2023 11:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save juananpe/98130f0b8f67edbd03f19f498dc10891 to your computer and use it in GitHub Desktop.
Save juananpe/98130f0b8f67edbd03f19f498dc10891 to your computer and use it in GitHub Desktop.
describe.py
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
from PIL import Image
import requests
import warnings
warnings.filterwarnings("ignore")
def load_model():
model = Pix2StructForConditionalGeneration.from_pretrained('google/pix2struct-textcaps-base').to("mps")
processor = Pix2StructProcessor.from_pretrained('google/pix2struct-textcaps-base')
return model, processor
def process_data(image, model, processor):
inputs = processor(images=image, return_tensors='pt').to("mps")
generated_text = model.generate(**inputs, max_length= 60)
response = processor.decode(generated_text[0], skip_special_tokens=True)
return response
image_path = './logo.png'
image = Image.open(image_path)
# Load the model and processor
model, processor = load_model()
# Process the image and generate text
response = process_data(image, model, processor)
print(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment