Created
January 20, 2023 13:02
-
-
Save hsm207/5faad0d2472f78c4226d118f4bc2bd5e to your computer and use it in GitHub Desktop.
davinci-003 vs ada-002
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Imports" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import weaviate\n", | |
"import uuid\n", | |
"import os\n" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Client" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"client = weaviate.Client(\"https://foo.weaviate.network\",\n", | |
" additional_headers={\"X-OpenAI-Api-Key\": os.environ[\"OPENAI_APIKEY\"]})\n", | |
"client.schema.delete_all()\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Data" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Upload some stuff to weaviate:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# with davinci 003\n", | |
"doc_class_schema = {\n", | |
" \"class\": \"Document\",\n", | |
" \"description\": \"A class called document\",\n", | |
" \"vectorizer\": \"text2vec-openai\",\n", | |
" \"moduleConfig\": {\n", | |
" \"text2vec-openai\": {\n", | |
" \"model\": \"davinci\",\n", | |
" \"modelVersion\": \"003\",\n", | |
" \"type\": \"text\"\n", | |
" }\n", | |
" },\n", | |
" \"properties\": [\n", | |
" {\n", | |
" \"dataType\": [\n", | |
" \"text\"\n", | |
" ],\n", | |
" \"description\": \"Content that will be vectorized\",\n", | |
" \"moduleConfig\": {\n", | |
" \"text2vec-openai\": {\n", | |
" \"skip\": \"false\",\n", | |
" \"vectorizePropertyName\": \"false\"\n", | |
" }\n", | |
" },\n", | |
" \"name\": \"content\"\n", | |
" }\n", | |
" ]\n", | |
"}\n", | |
"\n", | |
"# with ada 002\n", | |
"# doc_class_schema = {\n", | |
"# \"class\": \"Document\",\n", | |
"# \"description\": \"A class called document\",\n", | |
"# \"vectorizer\": \"text2vec-openai\",\n", | |
"# \"moduleConfig\": {\n", | |
"# \"text2vec-openai\": {\n", | |
"# \"model\": \"ada\",\n", | |
"# \"modelVersion\": \"002\",\n", | |
"# \"type\": \"text\"\n", | |
"# }\n", | |
"# },\n", | |
"# \"properties\": [\n", | |
"# {\n", | |
"# \"dataType\": [\n", | |
"# \"text\"\n", | |
"# ],\n", | |
"# \"description\": \"Content that will be vectorized\",\n", | |
"# \"moduleConfig\": {\n", | |
"# \"text2vec-openai\": {\n", | |
"# \"skip\": \"false\",\n", | |
"# \"vectorizePropertyName\": \"false\"\n", | |
"# }\n", | |
"# },\n", | |
"# \"name\": \"content\"\n", | |
"# }\n", | |
"# ]\n", | |
"# }\n", | |
"\n", | |
"client.schema.create_class(doc_class_schema)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"client.batch(batch_size=10, dynamic=True, num_workers=1)\n", | |
"\n", | |
"with client.batch as batch:\n", | |
" batch.add_data_object(\n", | |
" data_object={\n", | |
" \"content\": \"What accounting software do we use?\",\n", | |
" },\n", | |
" class_name=\"Document\"\n", | |
" )\n", | |
"\n", | |
" batch.add_data_object(\n", | |
" data_object={\n", | |
" \"content\": \"What software do we use for accounting?\",\n", | |
" },\n", | |
" class_name=\"Document\"\n", | |
" )\n", | |
"\n", | |
" batch.add_data_object(\n", | |
" data_object={\n", | |
" \"content\": \"What accounting software does the company use?\",\n", | |
" },\n", | |
" class_name=\"Document\"\n", | |
" )\n" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Queries" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Get vector for \"What accounting software do we use?\":" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"where_filter = {\n", | |
" \"path\": [\"content\"],\n", | |
" \"operator\": \"Equal\",\n", | |
" \"valueText\": \"What accounting software do we use?\"\n", | |
"}\n", | |
"\n", | |
"query_result = (\n", | |
" client.query\n", | |
" .get(\"Document\", \"content\")\n", | |
" .with_where(where_filter)\n", | |
" .with_additional([\"id\"])\n", | |
" .do()\n", | |
")\n", | |
"\n", | |
"uuid = query_result[\"data\"][\"Get\"][\"Document\"][0][\"_additional\"][\"id\"]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[-0.005881898,\n", | |
" 0.01804839,\n", | |
" 0.0030804658,\n", | |
" -0.015143907,\n", | |
" 0.011484764,\n", | |
" -0.0017613986,\n", | |
" -0.013659956,\n", | |
" -0.0007653602,\n", | |
" -0.01143403,\n", | |
" 0.0058914106]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"where_filter = {\n", | |
" \"path\": [\"id\"],\n", | |
" \"operator\": \"Equal\",\n", | |
" \"valueString\": uuid\n", | |
"}\n", | |
"\n", | |
"query_result = (\n", | |
" client.query\n", | |
" .get(\"Document\", \"content\")\n", | |
" .with_where(where_filter)\n", | |
" .with_additional([\"vector\"])\n", | |
" .do()\n", | |
")\n", | |
"\n", | |
"vector = query_result[\"data\"][\"Get\"][\"Document\"][0][\"_additional\"][\"vector\"]\n", | |
"vector[:10]\n" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Find documents near `vector`:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'_additional': {'distance': -4.7683716e-07}, 'content': 'What accounting software do we use?'}\n", | |
"{'_additional': {'distance': 0.028079808}, 'content': 'What software do we use for accounting?'}\n", | |
"{'_additional': {'distance': 0.052015245}, 'content': 'What accounting software does the company use?'}\n" | |
] | |
} | |
], | |
"source": [ | |
"nearVector = {\n", | |
" 'vector': vector\n", | |
"}\n", | |
"\n", | |
"result = (\n", | |
" client.query\n", | |
" .get(\"Document\", \"content\")\n", | |
" .with_additional('distance')\n", | |
" .with_near_vector(nearVector)\n", | |
" .do()\n", | |
")\n", | |
"\n", | |
"docs = result[\"data\"][\"Get\"][\"Document\"]\n", | |
"\n", | |
"for doc in docs:\n", | |
" print(doc)\n" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Find documents near \"What software do we use for accounting?\":" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nearText = {\n", | |
" \"concepts\": [\"What software do we use for accounting\"],\n", | |
" \"distance\": 0.6\n", | |
"}\n", | |
"\n", | |
"result = (\n", | |
" client.query\n", | |
" .get(\"Document\", [\"content\", \"_additional {distance} \"])\n", | |
" .with_near_text(nearText)\n", | |
" .do()\n", | |
")\n", | |
"\n", | |
"docs = result[\"data\"][\"Get\"][\"Document\"]\n", | |
"\n", | |
"for doc in docs:\n", | |
" print(doc)\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.16" | |
}, | |
"vscode": { | |
"interpreter": { | |
"hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment