Skip to content

Instantly share code, notes, and snippets.

@ftnext
Last active January 28, 2024 00:10
Show Gist options
  • Save ftnext/c6bc421326b8534c8f471fbb2216da85 to your computer and use it in GitHub Desktop.
Save ftnext/c6bc421326b8534c8f471fbb2216da85 to your computer and use it in GitHub Desktop.
# Based on https://gist.github.com/ftnext/b0f4db8dc71333f7b663c4f5da9ec16f
import numpy as np
from openai import OpenAI
client = OpenAI()
def get_embedding(text, model):
response = client.embeddings.create(input=text, model=model) # dimensions=256
embedding = response.data[0].embedding
return np.array(embedding)
sentences = [
"好きな食べ物は何ですか?",
"どこにお住まいですか?",
"朝の電車は混みますね",
"今日は良いお天気ですね",
"最近景気悪いですね",
]
model_name = "text-embedding-3-small"
# model_name = "text-embedding-3-large"
embeddings = []
for sentence in sentences:
embeddings.append(get_embedding(sentence, model_name))
embeddings = np.array(embeddings)
sentence = "今日は雨降らなくてよかった"
# sentence = "ハンバーガーは好きですか?"
embedding = get_embedding(sentence, model_name)
scores = np.dot(embedding, embeddings.T)
print("文:", sentence)
print("類似文:", sentences[np.argmax(scores)])
print("類似度:", scores)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment