Skip to content

Instantly share code, notes, and snippets.

Last active September 21, 2024 12:33
Show Gist options
  • Save simonw/4841eb3dee88327136f7ee94a8f5e78a to your computer and use it in GitHub Desktop.
Save simonw/4841eb3dee88327136f7ee94a8f5e78a to your computer and use it in GitHub Desktop.
import base64
import httpx
import llm
from PIL import Image
import io
def register_embedding_models(register):
class JinaClipEmbeddingModel(llm.EmbeddingModel):
model_id = "jina-clip-v1-api"
needs_key = "jina"
key_env_var = "JINA_API_KEY"
supports_binary = True
supports_text = True
def embed_batch(self, items):
input = []
for item in items:
if isinstance(item, bytes):
# If the item is a byte string, resize the image and add the base64 encoded string to the list
# Open the image from the byte string
image =
# Resize the image to a maximum of 896 high or 896 wide
image.thumbnail((896, 896))
# Convert the resized image to bytes
resized_image_bytes = io.BytesIO(), format='JPEG')
# Encode the resized image as base64
encoded = base64.b64encode(
input.append({"image": encoded})
elif isinstance(item, str):
input.append({"text": item})
response =
"Content-Type": "application/json",
"Authorization": "Bearer {}".format(self.get_key())
"model": "jina-clip-v1",
"normalized": False,
"embedding_type": "float",
"input": input
embeddings = response.json()["data"]
return [embedding["embedding"] for embedding in embeddings]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment