Skip to content

Instantly share code, notes, and snippets.

@hsm207
Created January 20, 2023 13:02
Show Gist options
  • Save hsm207/5faad0d2472f78c4226d118f4bc2bd5e to your computer and use it in GitHub Desktop.
Save hsm207/5faad0d2472f78c4226d118f4bc2bd5e to your computer and use it in GitHub Desktop.
davinci-003 vs ada-002
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import weaviate\n",
"import uuid\n",
"import os\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Client"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"client = weaviate.Client(\"https://foo.weaviate.network\",\n",
" additional_headers={\"X-OpenAI-Api-Key\": os.environ[\"OPENAI_APIKEY\"]})\n",
"client.schema.delete_all()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Upload some stuff to weaviate:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# with davinci 003\n",
"doc_class_schema = {\n",
" \"class\": \"Document\",\n",
" \"description\": \"A class called document\",\n",
" \"vectorizer\": \"text2vec-openai\",\n",
" \"moduleConfig\": {\n",
" \"text2vec-openai\": {\n",
" \"model\": \"davinci\",\n",
" \"modelVersion\": \"003\",\n",
" \"type\": \"text\"\n",
" }\n",
" },\n",
" \"properties\": [\n",
" {\n",
" \"dataType\": [\n",
" \"text\"\n",
" ],\n",
" \"description\": \"Content that will be vectorized\",\n",
" \"moduleConfig\": {\n",
" \"text2vec-openai\": {\n",
" \"skip\": \"false\",\n",
" \"vectorizePropertyName\": \"false\"\n",
" }\n",
" },\n",
" \"name\": \"content\"\n",
" }\n",
" ]\n",
"}\n",
"\n",
"# with ada 002\n",
"# doc_class_schema = {\n",
"# \"class\": \"Document\",\n",
"# \"description\": \"A class called document\",\n",
"# \"vectorizer\": \"text2vec-openai\",\n",
"# \"moduleConfig\": {\n",
"# \"text2vec-openai\": {\n",
"# \"model\": \"ada\",\n",
"# \"modelVersion\": \"002\",\n",
"# \"type\": \"text\"\n",
"# }\n",
"# },\n",
"# \"properties\": [\n",
"# {\n",
"# \"dataType\": [\n",
"# \"text\"\n",
"# ],\n",
"# \"description\": \"Content that will be vectorized\",\n",
"# \"moduleConfig\": {\n",
"# \"text2vec-openai\": {\n",
"# \"skip\": \"false\",\n",
"# \"vectorizePropertyName\": \"false\"\n",
"# }\n",
"# },\n",
"# \"name\": \"content\"\n",
"# }\n",
"# ]\n",
"# }\n",
"\n",
"client.schema.create_class(doc_class_schema)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"client.batch(batch_size=10, dynamic=True, num_workers=1)\n",
"\n",
"with client.batch as batch:\n",
" batch.add_data_object(\n",
" data_object={\n",
" \"content\": \"What accounting software do we use?\",\n",
" },\n",
" class_name=\"Document\"\n",
" )\n",
"\n",
" batch.add_data_object(\n",
" data_object={\n",
" \"content\": \"What software do we use for accounting?\",\n",
" },\n",
" class_name=\"Document\"\n",
" )\n",
"\n",
" batch.add_data_object(\n",
" data_object={\n",
" \"content\": \"What accounting software does the company use?\",\n",
" },\n",
" class_name=\"Document\"\n",
" )\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Queries"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Get vector for \"What accounting software do we use?\":"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"where_filter = {\n",
" \"path\": [\"content\"],\n",
" \"operator\": \"Equal\",\n",
" \"valueText\": \"What accounting software do we use?\"\n",
"}\n",
"\n",
"query_result = (\n",
" client.query\n",
" .get(\"Document\", \"content\")\n",
" .with_where(where_filter)\n",
" .with_additional([\"id\"])\n",
" .do()\n",
")\n",
"\n",
"uuid = query_result[\"data\"][\"Get\"][\"Document\"][0][\"_additional\"][\"id\"]\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[-0.005881898,\n",
" 0.01804839,\n",
" 0.0030804658,\n",
" -0.015143907,\n",
" 0.011484764,\n",
" -0.0017613986,\n",
" -0.013659956,\n",
" -0.0007653602,\n",
" -0.01143403,\n",
" 0.0058914106]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"where_filter = {\n",
" \"path\": [\"id\"],\n",
" \"operator\": \"Equal\",\n",
" \"valueString\": uuid\n",
"}\n",
"\n",
"query_result = (\n",
" client.query\n",
" .get(\"Document\", \"content\")\n",
" .with_where(where_filter)\n",
" .with_additional([\"vector\"])\n",
" .do()\n",
")\n",
"\n",
"vector = query_result[\"data\"][\"Get\"][\"Document\"][0][\"_additional\"][\"vector\"]\n",
"vector[:10]\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Find documents near `vector`:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'_additional': {'distance': -4.7683716e-07}, 'content': 'What accounting software do we use?'}\n",
"{'_additional': {'distance': 0.028079808}, 'content': 'What software do we use for accounting?'}\n",
"{'_additional': {'distance': 0.052015245}, 'content': 'What accounting software does the company use?'}\n"
]
}
],
"source": [
"nearVector = {\n",
" 'vector': vector\n",
"}\n",
"\n",
"result = (\n",
" client.query\n",
" .get(\"Document\", \"content\")\n",
" .with_additional('distance')\n",
" .with_near_vector(nearVector)\n",
" .do()\n",
")\n",
"\n",
"docs = result[\"data\"][\"Get\"][\"Document\"]\n",
"\n",
"for doc in docs:\n",
" print(doc)\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Find documents near \"What software do we use for accounting?\":"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"nearText = {\n",
" \"concepts\": [\"What software do we use for accounting\"],\n",
" \"distance\": 0.6\n",
"}\n",
"\n",
"result = (\n",
" client.query\n",
" .get(\"Document\", [\"content\", \"_additional {distance} \"])\n",
" .with_near_text(nearText)\n",
" .do()\n",
")\n",
"\n",
"docs = result[\"data\"][\"Get\"][\"Document\"]\n",
"\n",
"for doc in docs:\n",
" print(doc)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
},
"vscode": {
"interpreter": {
"hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment