Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@Emekaborisama
Last active August 30, 2022 12:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Emekaborisama/ed3831ca5933a6f1b75996a889cd5284 to your computer and use it in GitHub Desktop.
Save Emekaborisama/ed3831ca5933a6f1b75996a889cd5284 to your computer and use it in GitHub Desktop.
onnx inference on cpu with optimization
import onnxruntime
import time
ort_session = onnxruntime.InferenceSession("torch-model.onnx", providers=["CPUExecutionProvider"])
def to_numpy(tensor):
return tensor.detach.cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
def run_inference(input):
tokenei= tokenizer(input, padding=True, truncation=True,return_tensors="pt")
attention_mask = tokenei['attention_mask']
tokenei['input_ids'] =[to_numpy(x) for x in tokenei['input_ids']]
tokenei['attention_mask'] =[to_numpy(x) for x in tokenei['attention_mask']]
tokenei['token_type_ids'] =[to_numpy(x) for x in tokenei['token_type_ids']]
ort_outs = ort_session.run(['logits'], dict(tokenei))
return (ort_outs), attention_mask
start = time.time()
output,attention_mask = run_inference(sentences)
red = torch.Tensor(output)
# Perform pooling
sentence_embeddings = mean_pooling(model_output, attention_mask)
# Normalize embeddings
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
#cosine similarity
cosine_scores = util.pytorch_cos_sim(sentence_embeddings[0], sentence_embeddings[1])
cosine_scores
end = time.time()
print(end - start)
print(f"onnx cpu: {(end- start)/2:.2f}s/sequence")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment