Skip to content

Instantly share code, notes, and snippets.

@pszemraj
Last active February 2, 2024 16:18
Show Gist options
  • Save pszemraj/e57deefedbba14f782b79ec3ec15db8f to your computer and use it in GitHub Desktop.
Save pszemraj/e57deefedbba14f782b79ec3ec15db8f to your computer and use it in GitHub Desktop.
setting up nomic-embed-text-v1 in sbert and ONNX
# pip install sentence-transformers
from sentence_transformers import SentenceTransformer, util, models
model_name = "nomic-ai/nomic-embed-text-v1"
pooling_mode = "mean"
word_embedding_model = models.Transformer(
model_name,
max_seq_length=8192,
model_args={"trust_remote_code": True, "rotary_scaling_factor": 2},
tokenizer_args={"trust_remote_code": True},
)
pooling_model = models.Pooling(
word_embedding_model.get_word_embedding_dimension(),
pooling_mode=pooling_mode,
)
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
embeddings = model.encode(["here is some text", "oh look there is more"], batch_size=16)
print(embeddings.shape)
# pip install -U "optimum[onnxruntime]"
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForFeatureExtraction
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0]
input_mask_expanded = (
attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
)
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)
sentences = ["What is TSNE?", "Who is Laurens van der Maaten?"]
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", model_max_length=8192)
model = ORTModelForFeatureExtraction.from_pretrained(
"nomic-ai/nomic-embed-text-v1",
trust_remote_code=True,
file_name="onnx/model_quantized.onnx", # model.onnx for unquantized version
rotary_scaling_factor=2,
)
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
model_output = model(**encoded_input)
embeddings = mean_pooling(model_output, encoded_input["attention_mask"])
embeddings = F.normalize(embeddings, p=2, dim=1)
print(embeddings)
# pip install -U "optimum[onnxruntime]"
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForFeatureExtraction
from tqdm.auto import trange
class TextEmbedder:
def __init__(
self,
model_name="nomic-ai/nomic-embed-text-v1",
tokenizer_name="bert-base-uncased",
model_file_name="onnx/model_quantized.onnx",
batch_size=8,
):
self.model_name = model_name
self.tokenizer_name = tokenizer_name
self.batch_size = batch_size
self.tokenizer = AutoTokenizer.from_pretrained(
self.tokenizer_name or self.model_name, model_max_length=8192
)
self.model = ORTModelForFeatureExtraction.from_pretrained(
model_name,
trust_remote_code=True,
file_name=model_file_name, # Use "onnx/model.onnx" for unquantized version
rotary_scaling_factor=2,
)
@staticmethod
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0]
input_mask_expanded = (
attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
)
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)
def encode_batch(self, sentences_batch):
encoded_input = self.tokenizer(
sentences_batch, padding=True, truncation=True, return_tensors="pt"
)
with torch.no_grad():
model_output = self.model(**encoded_input)
embeddings = self.mean_pooling(model_output, encoded_input["attention_mask"])
return F.normalize(embeddings, p=2, dim=1)
def encode(
self,
sentences,
return_list=False,
disable_progress=False,
):
# Ensure sentences is a list
if isinstance(sentences, str):
sentences = [sentences] # Wrap a single sentence into a list
all_embeddings = []
for i in trange(0, len(sentences), self.batch_size, disable=disable_progress):
batch_sentences = sentences[i : i + self.batch_size]
batch_embeddings = self.encode_batch(batch_sentences)
all_embeddings.append(batch_embeddings)
all_embeddings = torch.cat(all_embeddings, dim=0)
return all_embeddings.cpu().tolist() if return_list else all_embeddings.cpu()
def __str__(self):
return (
f"TextEmbedder(model_name={self.model_name}, batch_size={self.batch_size})"
)
def __repr__(self):
return f"TextEmbedder(model_name='{self.model_name}', batch_size={self.batch_size})"
def __call__(self, sentences, **kwargs):
return self.encode(sentences, **kwargs)
# Example usage
if __name__ == "__main__":
sentences = [
"What is TSNE?",
"Who is Laurens van der Maaten?",
"Short sentence.",
"Another example.",
"Yet another query.",
"What is machine learning?",
"Tell me about OpenAI.",
"Explain deep learning.",
]
embedder = TextEmbedder(batch_size=3)
embeddings = embedder.encode(sentences)
print(embeddings)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment