Skip to content

Instantly share code, notes, and snippets.

@gaborcselle
Created November 13, 2023 20:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gaborcselle/3bd5041fc4ba39e086b5207ca6ac2bcd to your computer and use it in GitHub Desktop.
Save gaborcselle/3bd5041fc4ba39e086b5207ca6ac2bcd to your computer and use it in GitHub Desktop.
diff --git a/examples/Customizing_embeddings.ipynb b/examples/Customizing_embeddings.ipynb
index 4068254..eaaa871 100644
--- a/examples/Customizing_embeddings.ipynb
+++ b/examples/Customizing_embeddings.ipynb
@@ -51,7 +51,7 @@
"from sklearn.model_selection import train_test_split # for splitting train & test data\n",
"import torch # for matrix optimization\n",
"\n",
- "from openai.embeddings_utils import get_embedding, cosine_similarity # for embeddings\n"
+ "from utils.embeddings_utils import get_embedding, cosine_similarity # for embeddings\n"
]
},
{
diff --git a/examples/Multiclass_classification_for_transactions.ipynb b/examples/Multiclass_classification_for_transactions.ipynb
index 9f500c9..025318a 100644
--- a/examples/Multiclass_classification_for_transactions.ipynb
+++ b/examples/Multiclass_classification_for_transactions.ipynb
@@ -193,7 +193,7 @@
"source": [
"def request_completion(prompt):\n",
"\n",
- " completion_response = openai.Completion.create(\n",
+ " completion_response = openai.completions.create(\n",
" prompt=prompt,\n",
" temperature=0,\n",
" max_tokens=5,\n",
@@ -211,7 +211,7 @@
" prompt = prompt.replace('DESCRIPTION_TEXT',transaction['Description'])\n",
" prompt = prompt.replace('TRANSACTION_VALUE',str(transaction['Transaction value (£)']))\n",
"\n",
- " classification = request_completion(prompt)['choices'][0]['text'].replace('\\n','')\n",
+ " classification = request_completion(prompt).choices[0].text.replace('\\n','')\n",
"\n",
" return classification\n",
"\n",
@@ -916,8 +916,8 @@
"source": [
"from utils.embeddings_utils import get_embedding\n",
"\n",
- "df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, engine='text-similarity-babbage-001'))\n",
- "df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, engine='text-search-babbage-doc-001'))\n",
+ "df['babbage_similarity'] = df.combined.apply(lambda x: get_embedding(x, model='text-similarity-babbage-001'))\n",
+ "df['babbage_search'] = df.combined.apply(lambda x: get_embedding(x, model='text-search-babbage-doc-001'))\n",
"df.to_csv(embedding_path)\n"
]
},
@@ -2203,7 +2203,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.8"
+ "version": "3.11.3"
}
},
"nbformat": 4,
diff --git a/examples/Semantic_text_search_using_embeddings.ipynb b/examples/Semantic_text_search_using_embeddings.ipynb
index f6e59ff..6d3ee37 100644
--- a/examples/Semantic_text_search_using_embeddings.ipynb
+++ b/examples/Semantic_text_search_using_embeddings.ipynb
@@ -59,7 +59,7 @@
"def search_reviews(df, product_description, n=3, pprint=True):\n",
" product_embedding = get_embedding(\n",
" product_description,\n",
- " engine=\"text-embedding-ada-002\"\n",
+ " model=\"text-embedding-ada-002\"\n",
" )\n",
" df[\"similarity\"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding))\n",
"\n",
diff --git a/examples/Zero-shot_classification_with_embeddings.ipynb b/examples/Zero-shot_classification_with_embeddings.ipynb
index 7bb331c..ebe2eca 100644
--- a/examples/Zero-shot_classification_with_embeddings.ipynb
+++ b/examples/Zero-shot_classification_with_embeddings.ipynb
@@ -93,7 +93,7 @@
" labels = ['negative', 'positive'],\n",
" model = EMBEDDING_MODEL,\n",
"):\n",
- " label_embeddings = [get_embedding(label, engine=model) for label in labels]\n",
+ " label_embeddings = [get_embedding(label, model=model) for label in labels]\n",
"\n",
" def label_score(review_embedding, label_embeddings):\n",
" return cosine_similarity(review_embedding, label_embeddings[1]) - cosine_similarity(review_embedding, label_embeddings[0])\n",
diff --git a/examples/utils/embeddings_utils.py b/examples/utils/embeddings_utils.py
index efb306d..ed39114 100644
--- a/examples/utils/embeddings_utils.py
+++ b/examples/utils/embeddings_utils.py
@@ -15,51 +15,53 @@ import pandas as pd
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
-def get_embedding(text: str, engine="text-similarity-davinci-001", **kwargs) -> List[float]:
+def get_embedding(text: str, model="text-similarity-davinci-001", **kwargs) -> List[float]:
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
- return openai.Embedding.create(input=[text], engine=engine, **kwargs)["data"][0]["embedding"]
+ response = openai.embeddings.create(input=[text], model=model, **kwargs)
+
+ return response.data[0].embedding
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
async def aget_embedding(
- text: str, engine="text-similarity-davinci-001", **kwargs
+ text: str, model="text-similarity-davinci-001", **kwargs
) -> List[float]:
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
- return (await openai.Embedding.acreate(input=[text], engine=engine, **kwargs))["data"][0][
+ return (await openai.embeddings.create(input=[text], model=model, **kwargs))["data"][0][
"embedding"
]
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_embeddings(
- list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs
+ list_of_text: List[str], model="text-similarity-babbage-001", **kwargs
) -> List[List[float]]:
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
# replace newlines, which can negatively affect performance.
list_of_text = [text.replace("\n", " ") for text in list_of_text]
- data = openai.Embedding.create(input=list_of_text, engine=engine, **kwargs).data
- return [d["embedding"] for d in data]
+ data = openai.embeddings.create(input=list_of_text, model=model, **kwargs).data
+ return [d.embedding for d in data]
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
async def aget_embeddings(
- list_of_text: List[str], engine="text-similarity-babbage-001", **kwargs
+ list_of_text: List[str], model="text-similarity-babbage-001", **kwargs
) -> List[List[float]]:
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
# replace newlines, which can negatively affect performance.
list_of_text = [text.replace("\n", " ") for text in list_of_text]
- data = (await openai.Embedding.acreate(input=list_of_text, engine=engine, **kwargs)).data
- return [d["embedding"] for d in data]
+ data = (await openai.embeddings.create(input=list_of_text, model=model, **kwargs)).data
+ return [d.embedding for d in data]
def cosine_similarity(a, b):
@aifreak00
Copy link

i am getting errors in these embeddings any clue!

@aifreak00
Copy link

its telling no module utils .

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment