veekaybee/embed.py Secret

## embed.py
from sentence_transformers import SentenceTransformer, util

#  A common value for BERT & Co. are 512 word pieces, which correspond to about 300-400 words (for English).
# Longer texts than this are truncated to the first x word pieces.
# By default, the provided methods use a limit fo 128 word pieces, longer inputs will be truncated
# the runtime and the memory requirement grows quadratic with the input length - we'll have to play around with this

# Change the length to 200
model = SentenceTransformer("sentence-transformers/msmarco-distilbert-base-v3")
model.max_seq_length = 200


corpus_embeddings = model.encode(
    corpus, show_progress_bar=True, device="cuda", convert_to_numpy=False
)
	from sentence_transformers import SentenceTransformer, util

	# A common value for BERT & Co. are 512 word pieces, which correspond to about 300-400 words (for English).
	# Longer texts than this are truncated to the first x word pieces.
	# By default, the provided methods use a limit fo 128 word pieces, longer inputs will be truncated
	# the runtime and the memory requirement grows quadratic with the input length - we'll have to play around with this

	# Change the length to 200
	model = SentenceTransformer("sentence-transformers/msmarco-distilbert-base-v3")
	model.max_seq_length = 200


	corpus_embeddings = model.encode(
	corpus, show_progress_bar=True, device="cuda", convert_to_numpy=False
	)