AlexeyVatolin/compare_gte.py

## compare_gte.py
import torch
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
from sklearn.metrics import mean_squared_error
from torch import Tensor
from transformers import AutoModel, AutoTokenizer

from mteb.models.gte_models import (
    PromptType,
    gte_Qwen1_5_7B_instruct,
    gte_Qwen2_1_5B_instruct,
    gte_Qwen2_7B_instruct,
)


def encode_with_sentence_transformer(queries, documents, model_name):
    model = SentenceTransformer(model_name, trust_remote_code=True)
    model.max_seq_length = 8192
    query_embeddings = model.encode(queries, prompt_name="query")
    document_embeddings = model.encode(documents)
    scores = (query_embeddings @ document_embeddings.T) * 100
    return scores


def encode_with_auto_model(queries, documents, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
    queries = [get_detailed_instruct(task, query) for query in queries]
    input_texts = queries + documents
    batch_dict = tokenizer(
        input_texts, max_length=8192, padding=True, truncation=True, return_tensors="pt"
    )
    outputs = model(**batch_dict)
    embeddings = last_token_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
    embeddings = F.normalize(embeddings, p=2, dim=1)
    scores = (embeddings[:2] @ embeddings[2:].T) * 100
    return scores.detach().numpy()


def encode_with_mteb(queries, documents, model):
    model = model.load_model()
    query_embeddings = model.encode(queries, task_name="MSMARCO", prompt_type=PromptType.query)
    passage_embeddings = model.encode(
        documents, task_name="MSMARCO", prompt_type=PromptType.passage
    )
    scores = (query_embeddings @ passage_embeddings.T) * 100
    return scores


def compute_scores(scores1, scores2, scores3):
    mse1_2 = mean_squared_error(scores1, scores2)
    mse1_3 = mean_squared_error(scores1, scores3)
    mse2_3 = mean_squared_error(scores2, scores3)
    return mse1_2, mse1_3, mse2_3


def print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3):
    print("Scores from SentenceTransformer model:")
    print(scores1.tolist())
    print("Scores from AutoModel:")
    print(scores2.tolist())
    print("Scores from mteb model:")
    print(scores3.tolist())
    print(f"MSE between SentenceTransformer and AutoModel: {mse1_2}")
    print(f"MSE between SentenceTransformer and mteb: {mse1_3}")
    print(f"MSE between AutoModel and mteb: {mse2_3}")


def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
    left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0]
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[
            torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths
        ]


def get_detailed_instruct(task_description: str, query: str) -> str:
    return f"Instruct: {task_description}\nQuery: {query}"


if __name__ == "__main__":
    task = "Given a web search query, retrieve relevant passages that answer the query"
    queries = ["how much protein should a female eat", "summit define"]
    documents = [
        "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
        "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
    ]

    scores1 = encode_with_sentence_transformer(
        queries, documents, "Alibaba-NLP/gte-Qwen2-7B-instruct"
    )
    scores2 = encode_with_auto_model(queries, documents, "Alibaba-NLP/gte-Qwen2-7B-instruct")
    scores3 = encode_with_mteb(queries, documents, gte_Qwen2_7B_instruct)
    mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
    print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)

    # Scores from SentenceTransformer model:
    # [[70.39697265625, 3.4318289756774902], [4.516181945800781, 81.91806030273438]]
    # Scores from AutoModel:
    # [[70.3969955444336, 3.4318275451660156], [4.516171455383301, 81.91804504394531]]
    # Scores from gte_Qwen2_7B_instruct model:
    # [[70.39696502685547, 3.4318320751190186], [4.516174793243408, 81.91804504394531]]
    # MSE between SentenceTransformer and AutoModel: 2.1719870346714742e-10
    # MSE between SentenceTransformer and gte_Qwen2_7B_instruct: 8.79509798323852e-11
    # MSE between AutoModel and gte_Qwen2_7B_instruct: 2.4074608973023714e-10

    scores1 = encode_with_sentence_transformer(
        queries, documents, "Alibaba-NLP/gte-Qwen1.5-7B-instruct"
    )
    scores2 = encode_with_auto_model(queries, documents, "Alibaba-NLP/gte-Qwen1.5-7B-instruct")
    scores3 = encode_with_mteb(queries, documents, gte_Qwen1_5_7B_instruct)
    mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
    print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)

    # Scores from SentenceTransformer model:
    # [[70.00669860839844, 8.184866905212402], [14.6242094039917, 77.71407318115234]]
    # Scores from AutoModel:
    # [[70.00666809082031, 8.184863090515137], [14.6242036819458, 77.71405029296875]]
    # Scores from mteb model:
    # [[70.0066909790039, 8.184870719909668], [14.62420654296875, 77.71406555175781]]
    # MSE between SentenceTransformer and AutoModel: 3.7562131183221936e-10
    # MSE between SentenceTransformer and mteb: 3.4788172342814505e-11
    # MSE between AutoModel and mteb: 2.0577317627612501e-10

    scores1 = encode_with_sentence_transformer(
        queries, documents, "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
    )
    scores2 = encode_with_auto_model(queries, documents, "Alibaba-NLP/gte-Qwen2-1.5B-instruct")
    scores3 = encode_with_mteb(queries, documents, gte_Qwen2_1_5B_instruct)
    mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
    print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)

    # Scores from SentenceTransformer model:
    # [[78.49691772460938, 17.042865753173828], [14.924497604370117, 75.37960815429688]]
    # Scores from AutoModel:
    # [[78.49688720703125, 17.04286766052246], [14.924491882324219, 75.37960052490234]]
    # Scores from mteb model:
    # [[78.49691772460938, 17.042871475219727], [14.924491882324219, 75.37960052490234]]
    # MSE between SentenceTransformer and AutoModel: 2.5647750589996576e-10
    # MSE between SentenceTransformer and mteb: 3.092281986027956e-11
    # MSE between AutoModel and mteb: 2.3646862246096134e-10

## compare_linq.py

import torch
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
from sklearn.metrics import mean_squared_error
from torch import Tensor
from transformers import AutoModel, AutoTokenizer

# from mteb.models.linq_models import PromptType, Linq_Embed_Mistral


def encode_with_sentence_transformer(queries, documents, task, model_name):
    prompt = f"Instruct: {task}\nQuery: "
    model = SentenceTransformer(model_name, trust_remote_code=True)
    query_embeddings = model.encode(queries, prompt=prompt)
    passage_embeddings = model.encode(documents)
    scores = model.similarity(query_embeddings, passage_embeddings) * 100
    return scores


def encode_with_auto_model(queries, documents, task, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    queries = [get_detailed_instruct(task, query) for query in queries]
    input_texts = queries + documents
    batch_dict = tokenizer(
        input_texts, max_length=4096, padding=True, truncation=True, return_tensors="pt"
    )
    outputs = model(**batch_dict)
    embeddings = last_token_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
    embeddings = F.normalize(embeddings, p=2, dim=1)
    scores = (embeddings[:2] @ embeddings[2:].T) * 100
    return scores.detach().numpy()


def encode_with_mteb(queries, documents, model):
    model = model.load_model()
    query_embeddings = model.encode(
        queries, task_name="MIRACLRetrieval", prompt_type=PromptType.query
    )
    passage_embeddings = model.encode(
        documents, task_name="MIRACLRetrieval", prompt_type=PromptType.passage
    )
    scores = (query_embeddings @ passage_embeddings.T) * 100
    return scores


def compute_scores(scores1, scores2, scores3):
    mse1_2 = mean_squared_error(scores1, scores2)
    mse1_3 = mean_squared_error(scores1, scores3)
    mse2_3 = mean_squared_error(scores2, scores3)
    return mse1_2, mse1_3, mse2_3


def print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3):
    print("Scores from SentenceTransformer model:")
    print(scores1.tolist())
    print("Scores from AutoModel:")
    print(scores2.tolist())
    print("Scores from mteb model:")
    print(scores3.tolist())
    print(f"MSE between SentenceTransformer and AutoModel: {mse1_2}")
    print(f"MSE between SentenceTransformer and mteb: {mse1_3}")
    print(f"MSE between AutoModel and mteb: {mse2_3}")


def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
    left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0]
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[
            torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths
        ]


def get_detailed_instruct(task_description: str, query: str) -> str:
    return f"Instruct: {task_description}\nQuery: {query}"


if __name__ == "__main__":
    task = "Given a question, retrieve Wikipedia passages that answer the question"
    queries = [
        get_detailed_instruct(task, "최초의 원자력 발전소는 무엇인가?"),
        get_detailed_instruct(task, "Who invented Hangul?"),
    ]
    # No need to add instruction for retrieval documents
    passages = [
        "현재 사용되는 핵분열 방식을 이용한 전력생산은 1948년 9월 미국 테네시주 오크리지에 설치된 X-10 흑연원자로에서 전구의 불을 밝히는 데 사용되면서 시작되었다. 그리고 1954년 6월에 구소련의 오브닌스크에 건설된 흑연감속 비등경수 압력관형 원자로를 사용한 오브닌스크 원자력 발전소가 시험적으로 전력생산을 시작하였고, 최초의 상업용 원자력 엉더이로를 사용한 영국 셀라필드 원자력 단지에 위치한 콜더 홀(Calder Hall) 원자력 발전소로, 1956년 10월 17일 상업 운전을 시작하였다.",
        "Hangul was personally created and promulgated by the fourth king of the Joseon dynasty, Sejong the Great.[1][2] Sejong's scholarly institute, the Hall of Worthies, is often credited with the work, and at least one of its scholars was heavily involved in its creation, but it appears to have also been a personal project of Sejong.",
    ]

    scores1 = encode_with_sentence_transformer(
        queries, passages, task, "Linq-AI-Research/Linq-Embed-Mistral"
    )
    scores2 = encode_with_auto_model(
        queries, passages, task, "Linq-AI-Research/Linq-Embed-Mistral"
    )
    scores3 = encode_with_mteb(queries, passages, Linq_Embed_Mistral)
    mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
    print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)

    # Scores from SentenceTransformer model:
    # [[73.65106201171875, 30.973318099975586], [29.315975189208984, 78.59217834472656]]
    # Scores from AutoModel:
    # [[73.65103912353516, 30.97328758239746], [29.315948486328125, 78.59219360351562]]
    # Scores from mteb model:
    # [[73.65101623535156, 30.97330093383789], [29.315954208374023, 78.59220123291016]]
    # MSE between SentenceTransformer and AutoModel: 6.002665031701326e-10
    # MSE between SentenceTransformer and mteb: 8.385541150346398e-10
    # MSE between AutoModel and mteb: 1.9826984498649836e-10

## compare_nvidia.py

import torch
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
from sklearn.metrics import mean_squared_error
from torch import Tensor
from transformers import AutoModel, AutoTokenizer

from mteb.models.nvidia_models import PromptType, NV_embed_v2, NV_embed_v1


def encode_with_sentence_transformer(queries, documents, task, model_name):
    prompt = f"Instruct: {task}\nQuery: "

    model = SentenceTransformer(model_name, trust_remote_code=True)
    model.max_seq_length = 32768
    model.tokenizer.padding_side = "right"

    def add_eos(input_examples):
        input_examples = [
            input_example + model.tokenizer.eos_token for input_example in input_examples
        ]
        return input_examples

    batch_size = 2
    query_embeddings = model.encode(
        add_eos(queries), batch_size=batch_size, prompt=prompt, normalize_embeddings=True
    )
    passage_embeddings = model.encode(
        add_eos(passages), batch_size=batch_size, prompt="", normalize_embeddings=True
    )

    scores = (query_embeddings @ passage_embeddings.T) * 100
    return scores


def encode_with_auto_model(queries, documents, task, model_name):
    prompt = f"Instruct: {task}\nQuery: "

    model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
    max_length = 32768
    query_embeddings = model.encode(queries, instruction=prompt, max_length=max_length)
    passage_embeddings = model.encode(documents, instruction="", max_length=max_length)

    query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
    passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)
    scores = (query_embeddings @ passage_embeddings.T) * 100
    return scores.detach().numpy()


def encode_with_mteb(queries, documents, task_name, model):
    model = model.load_model()
    query_embeddings = model.encode(queries, task_name=task_name, prompt_type=PromptType.query)
    passage_embeddings = model.encode(
        documents, task_name=task_name, prompt_type=PromptType.passage
    )
    scores = (query_embeddings @ passage_embeddings.T) * 100
    return scores


def compute_scores(scores1, scores2, scores3):
    mse1_2 = mean_squared_error(scores1, scores2)
    mse1_3 = mean_squared_error(scores1, scores3)
    mse2_3 = mean_squared_error(scores2, scores3)
    return mse1_2, mse1_3, mse2_3


def print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3):
    print("Scores from SentenceTransformer model:")
    print(scores1.tolist())
    print("Scores from AutoModel:")
    print(scores2.tolist())
    print("Scores from mteb model:")
    print(scores3.tolist())
    print(f"MSE between SentenceTransformer and AutoModel: {mse1_2}")
    print(f"MSE between SentenceTransformer and mteb: {mse1_3}")
    print(f"MSE between AutoModel and mteb: {mse2_3}")


if __name__ == "__main__":
    task = "Given a question, retrieve passages that answer the question"
    queries = ["최초의 원자력 발전소는 무엇인가?", "Who invented Hangul?"]
    # No need to add instruction for retrieval documents
    passages = [
        "현재 사용되는 핵분열 방식을 이용한 전력생산은 1948년 9월 미국 테네시주 오크리지에 설치된 X-10 흑연원자로에서 전구의 불을 밝히는 데 사용되면서 시작되었다. 그리고 1954년 6월에 구소련의 오브닌스크에 건설된 흑연감속 비등경수 압력관형 원자로를 사용한 오브닌스크 원자력 발전소가 시험적으로 전력생산을 시작하였고, 최초의 상업용 원자력 엉더이로를 사용한 영국 셀라필드 원자력 단지에 위치한 콜더 홀(Calder Hall) 원자력 발전소로, 1956년 10월 17일 상업 운전을 시작하였다.",
        "Hangul was personally created and promulgated by the fourth king of the Joseon dynasty, Sejong the Great.[1][2] Sejong's scholarly institute, the Hall of Worthies, is often credited with the work, and at least one of its scholars was heavily involved in its creation, but it appears to have also been a personal project of Sejong.",
    ]

    scores1 = encode_with_sentence_transformer(queries, passages, task, "nvidia/NV-Embed-v2")
    scores2 = encode_with_auto_model(queries, passages, task, "nvidia/NV-Embed-v2")
    scores3 = encode_with_mteb(queries, passages, "MIRACLReranking", NV_embed_v2)
    mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
    print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)
    # [[74.80958557128906, 8.955424308776855], [11.394946098327637, 73.29377746582031]]
    # Scores from AutoModel:
    # [[75.59257507324219, 8.993165016174316], [10.881285667419434, 72.11418151855469]]
    # Scores from mteb model:
    # [[74.80958557128906, 8.955424308776855], [11.394946098327637, 73.29377746582031]]
    # MSE between SentenceTransformer and AutoModel: 0.5674476623535156
    # MSE between SentenceTransformer and mteb: 0.0
    # MSE between AutoModel and mteb: 0.5674476623535156

    scores1 = encode_with_sentence_transformer(queries, passages, task, "nvidia/NV-Embed-v1")
    scores2 = encode_with_auto_model(queries, passages, task, "nvidia/NV-Embed-v1")
    scores3 = encode_with_mteb(queries, passages, "MIRACLReranking", NV_embed_v1)
    mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
    print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)
    # [[61.517578125, 4.365936756134033], [6.746277809143066, 73.04427337646484]]
    # Scores from AutoModel:
    # [[62.26088333129883, 4.267183780670166], [6.847907543182373, 73.00653076171875]]
    # Scores from mteb model:
    # [[61.517578125, 4.365936756134033], [6.746277809143066, 73.04427337646484]]
    # MSE between SentenceTransformer and AutoModel: 0.14350196719169617
    # MSE between SentenceTransformer and mteb: 0.0
    # MSE between AutoModel and mteb: 0.14350196719169617
	import torch
	import torch.nn.functional as F
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics import mean_squared_error
	from torch import Tensor
	from transformers import AutoModel, AutoTokenizer

	from mteb.models.gte_models import (
	PromptType,
	gte_Qwen1_5_7B_instruct,
	gte_Qwen2_1_5B_instruct,
	gte_Qwen2_7B_instruct,
	)


	def encode_with_sentence_transformer(queries, documents, model_name):
	model = SentenceTransformer(model_name, trust_remote_code=True)
	model.max_seq_length = 8192
	query_embeddings = model.encode(queries, prompt_name="query")
	document_embeddings = model.encode(documents)
	scores = (query_embeddings @ document_embeddings.T) * 100
	return scores


	def encode_with_auto_model(queries, documents, model_name):
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
	queries = [get_detailed_instruct(task, query) for query in queries]
	input_texts = queries + documents
	batch_dict = tokenizer(
	input_texts, max_length=8192, padding=True, truncation=True, return_tensors="pt"
	)
	outputs = model(**batch_dict)
	embeddings = last_token_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
	embeddings = F.normalize(embeddings, p=2, dim=1)
	scores = (embeddings[:2] @ embeddings[2:].T) * 100
	return scores.detach().numpy()


	def encode_with_mteb(queries, documents, model):
	model = model.load_model()
	query_embeddings = model.encode(queries, task_name="MSMARCO", prompt_type=PromptType.query)
	passage_embeddings = model.encode(
	documents, task_name="MSMARCO", prompt_type=PromptType.passage
	)
	scores = (query_embeddings @ passage_embeddings.T) * 100
	return scores


	def compute_scores(scores1, scores2, scores3):
	mse1_2 = mean_squared_error(scores1, scores2)
	mse1_3 = mean_squared_error(scores1, scores3)
	mse2_3 = mean_squared_error(scores2, scores3)
	return mse1_2, mse1_3, mse2_3


	def print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3):
	print("Scores from SentenceTransformer model:")
	print(scores1.tolist())
	print("Scores from AutoModel:")
	print(scores2.tolist())
	print("Scores from mteb model:")
	print(scores3.tolist())
	print(f"MSE between SentenceTransformer and AutoModel: {mse1_2}")
	print(f"MSE between SentenceTransformer and mteb: {mse1_3}")
	print(f"MSE between AutoModel and mteb: {mse2_3}")


	def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
	left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0]
	if left_padding:
	return last_hidden_states[:, -1]
	else:
	sequence_lengths = attention_mask.sum(dim=1) - 1
	batch_size = last_hidden_states.shape[0]
	return last_hidden_states[
	torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths
	]


	def get_detailed_instruct(task_description: str, query: str) -> str:
	return f"Instruct: {task_description}\nQuery: {query}"


	if __name__ == "__main__":
	task = "Given a web search query, retrieve relevant passages that answer the query"
	queries = ["how much protein should a female eat", "summit define"]
	documents = [
	"As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
	"Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments.",
	]

	scores1 = encode_with_sentence_transformer(
	queries, documents, "Alibaba-NLP/gte-Qwen2-7B-instruct"
	)
	scores2 = encode_with_auto_model(queries, documents, "Alibaba-NLP/gte-Qwen2-7B-instruct")
	scores3 = encode_with_mteb(queries, documents, gte_Qwen2_7B_instruct)
	mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
	print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)

	# Scores from SentenceTransformer model:
	# [[70.39697265625, 3.4318289756774902], [4.516181945800781, 81.91806030273438]]
	# Scores from AutoModel:
	# [[70.3969955444336, 3.4318275451660156], [4.516171455383301, 81.91804504394531]]
	# Scores from gte_Qwen2_7B_instruct model:
	# [[70.39696502685547, 3.4318320751190186], [4.516174793243408, 81.91804504394531]]
	# MSE between SentenceTransformer and AutoModel: 2.1719870346714742e-10
	# MSE between SentenceTransformer and gte_Qwen2_7B_instruct: 8.79509798323852e-11
	# MSE between AutoModel and gte_Qwen2_7B_instruct: 2.4074608973023714e-10

	scores1 = encode_with_sentence_transformer(
	queries, documents, "Alibaba-NLP/gte-Qwen1.5-7B-instruct"
	)
	scores2 = encode_with_auto_model(queries, documents, "Alibaba-NLP/gte-Qwen1.5-7B-instruct")
	scores3 = encode_with_mteb(queries, documents, gte_Qwen1_5_7B_instruct)
	mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
	print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)

	# Scores from SentenceTransformer model:
	# [[70.00669860839844, 8.184866905212402], [14.6242094039917, 77.71407318115234]]
	# Scores from AutoModel:
	# [[70.00666809082031, 8.184863090515137], [14.6242036819458, 77.71405029296875]]
	# Scores from mteb model:
	# [[70.0066909790039, 8.184870719909668], [14.62420654296875, 77.71406555175781]]
	# MSE between SentenceTransformer and AutoModel: 3.7562131183221936e-10
	# MSE between SentenceTransformer and mteb: 3.4788172342814505e-11
	# MSE between AutoModel and mteb: 2.0577317627612501e-10

	scores1 = encode_with_sentence_transformer(
	queries, documents, "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
	)
	scores2 = encode_with_auto_model(queries, documents, "Alibaba-NLP/gte-Qwen2-1.5B-instruct")
	scores3 = encode_with_mteb(queries, documents, gte_Qwen2_1_5B_instruct)
	mse1_2, mse1_3, mse2_3 = compute_scores(scores1, scores2, scores3)
	print_report(scores1, scores2, scores3, mse1_2, mse1_3, mse2_3)

	# Scores from SentenceTransformer model:
	# [[78.49691772460938, 17.042865753173828], [14.924497604370117, 75.37960815429688]]
	# Scores from AutoModel:
	# [[78.49688720703125, 17.04286766052246], [14.924491882324219, 75.37960052490234]]
	# Scores from mteb model:
	# [[78.49691772460938, 17.042871475219727], [14.924491882324219, 75.37960052490234]]
	# MSE between SentenceTransformer and AutoModel: 2.5647750589996576e-10
	# MSE between SentenceTransformer and mteb: 3.092281986027956e-11
	# MSE between AutoModel and mteb: 2.3646862246096134e-10