abodacs/semantic-search-main.py

## semantic-search-main.py
from sentence_transformers import SentenceTransformer, util
import torch

# save model in current directory
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2', device='cpu', cache_folder='./')
# save model in models folder (you need to create the folder on your own beforehand)
# model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2', device='cpu', cache_folder='./models/')

# Corpus with example sentences
corpus = [
    'I am a boy',
    'What are you doing?',
    'Can you help me?',
    'A man is riding a horse.',
    'A woman is playing violin.',
    'A monkey is chasing after a goat',
    'The quick brown fox jumps over the lazy dog'
]

# Query sentences:
queries = ['I am in need of assistance', '我是男孩子', 'Qué estás haciendo']

corpus_embedding = model.encode(corpus, convert_to_tensor=True)

top_k = min(5, len(corpus))

for query in queries:
    query_embedding = model.encode(query, convert_to_tensor=True)

    cos_scores = util.cos_sim(query_embedding, corpus_embedding)[0]
    top_results = torch.topk(cos_scores, k=top_k)
    print("Query:", query)
    print("---------------------------")
    for score, idx in zip(top_results[0], top_results[1]):
        print(f'{round(score.item(), 3)} | {corpus[idx]}')
	from sentence_transformers import SentenceTransformer, util
	import torch

	# save model in current directory
	model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2', device='cpu', cache_folder='./')
	# save model in models folder (you need to create the folder on your own beforehand)
	# model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2', device='cpu', cache_folder='./models/')

	# Corpus with example sentences
	corpus = [
	'I am a boy',
	'What are you doing?',
	'Can you help me?',
	'A man is riding a horse.',
	'A woman is playing violin.',
	'A monkey is chasing after a goat',
	'The quick brown fox jumps over the lazy dog'
	]

	# Query sentences:
	queries = ['I am in need of assistance', '我是男孩子', 'Qué estás haciendo']

	corpus_embedding = model.encode(corpus, convert_to_tensor=True)

	top_k = min(5, len(corpus))

	for query in queries:
	query_embedding = model.encode(query, convert_to_tensor=True)

	cos_scores = util.cos_sim(query_embedding, corpus_embedding)[0]
	top_results = torch.topk(cos_scores, k=top_k)
	print("Query:", query)
	print("---------------------------")
	for score, idx in zip(top_results[0], top_results[1]):
	print(f'{round(score.item(), 3)} \| {corpus[idx]}')