Created
March 9, 2023 20:46
-
-
Save hsm207/84ea672d1dda1db83f80a20b4ea8a069 to your computer and use it in GitHub Desktop.
How to use the classification endpoint in weaviate
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Introduction" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This notebook shows how to upload a dataset in weaviate to be able to use the [classification endpoint](https://weaviate.io/developers/weaviate/api/rest/classification)." | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Imports" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import weaviate" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Setup" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Connect to weaviate and clear the database:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"username = os.getenv(\"WEAVIATE_USERNAME\")\n", | |
"password = os.getenv(\"WEAVIATE_PASSWORD\")\n", | |
"\n", | |
"url = \"https://jinaai.weaviate.network/\"\n", | |
"\n", | |
"secret = weaviate.AuthClientPassword(username=username, password=password)\n", | |
"\n", | |
"client = weaviate.Client(url, auth_client_secret=secret)\n", | |
"\n", | |
"client.schema.delete_all()" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Dataset" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Make up some dummy dataset." | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"First, we create some labels in weaviate:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"labels = [\"chair\", \"standing desk\", \"tree\"]\n", | |
"\n", | |
"\n", | |
"uuids = [\n", | |
" client.data_object.create(data_object={\"name\": label}, class_name=\"Label\")\n", | |
" for label in labels\n", | |
"]\n", | |
"\n", | |
"label2uuid = {label: uuid for label, uuid in zip(labels, uuids)}" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Next, create a Mesh class with a cross reference to the Label class:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"client.schema.create_class(\n", | |
" {\n", | |
" \"class\": \"Mesh\",\n", | |
" \"properties\": [\n", | |
" {\"name\": \"name\", \"dataType\": [\"string\"]},\n", | |
" {\"name\": \"label\", \"dataType\": [\"Label\"]},\n", | |
" {\"name\": \"file\", \"dataType\": [\"string\"]},\n", | |
" {\"name\": \"description\", \"dataType\": [\"text\"]},\n", | |
" ],\n", | |
" }\n", | |
")" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Upload some meshes:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mesh_1 = {\n", | |
" \"label\": \"chair\",\n", | |
" \"vector\": [1, 0],\n", | |
" \"file\": \"chair.png\",\n", | |
" \"description\": \"Experience unparalleled comfort with this ergonomically designed chair that conforms to your body's natural curves, providing support and reducing strain on your back\",\n", | |
"}\n", | |
"\n", | |
"mesh_2 = {\n", | |
" \"label\": \"standing desk\",\n", | |
" \"vector\": [-1, 0],\n", | |
" \"file\": \"standing_desk.png\",\n", | |
" \"description\": \"Indulge in the ultimate symbol of luxury with a one-of-a-kind standing desk encrusted with rare jewels and precious metals, featuring an interactive holographic display and cutting-edge technology, worth billions of dollars.\",\n", | |
"}\n", | |
"\n", | |
"meshes = [mesh_1, mesh_2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for mesh in meshes:\n", | |
" vector = mesh.pop(\"vector\")\n", | |
" label = mesh.pop(\"label\")\n", | |
"\n", | |
" label_uuid = label2uuid[label]\n", | |
"\n", | |
" mesh_uuid = client.data_object.create(\n", | |
" data_object=mesh, class_name=\"Mesh\", vector=vector\n", | |
" )\n", | |
"\n", | |
" client.data_object.reference.add(\n", | |
" from_uuid=mesh_uuid,\n", | |
" from_property_name=\"label\",\n", | |
" to_uuid=label_uuid,\n", | |
" from_class_name=\"Mesh\",\n", | |
" to_class_name=\"Label\",\n", | |
" )" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Classification" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Upload a mesh without a label:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'5843a613-307d-4a50-b967-9052f7a6b014'" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"mesh_3 = {\n", | |
" \"vector\": [1, 0.1],\n", | |
" \"file\": \"sofa.png\",\n", | |
" \"description\": \"Upgrade your living space without breaking the bank with a high-quality second hand sofa that is not only affordable but also sustainable.\",\n", | |
"}\n", | |
"\n", | |
"vector = mesh_3.pop(\"vector\")\n", | |
"\n", | |
"client.data_object.create(data_object=mesh_3, class_name=\"Mesh\", vector=vector)" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Make the classification request:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"classification_status = (\n", | |
" client.classification.schedule()\n", | |
" .with_type(\"zeroshot\")\n", | |
" .with_class_name(\"Mesh\")\n", | |
" .with_classify_properties([\"label\"])\n", | |
" .with_based_on_properties([\"description\"])\n", | |
" .do()\n", | |
")" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Wait for the classification to finish:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'basedOnProperties': ['description'],\n", | |
" 'class': 'Mesh',\n", | |
" 'classifyProperties': ['label'],\n", | |
" 'id': 'd1fe6e11-684c-4249-81fb-bd119f60320c',\n", | |
" 'meta': {'completed': '2023-03-07T15:24:51.447Z',\n", | |
" 'count': 1,\n", | |
" 'countSucceeded': 1,\n", | |
" 'started': '2023-03-07T15:24:51.445Z'},\n", | |
" 'status': 'completed',\n", | |
" 'type': 'zeroshot'}" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.classification.get(classification_status[\"id\"])" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Check the results:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'data': {'Get': {'Mesh': [{'_additional': None,\n", | |
" 'file': 'standing_desk.png',\n", | |
" 'label': [{'name': 'standing desk'}]},\n", | |
" {'_additional': {'classification': {'basedOn': [],\n", | |
" 'classifiedFields': [],\n", | |
" 'completed': '2023-03-07T15:24:51.447Z',\n", | |
" 'id': 'd1fe6e11-684c-4249-81fb-bd119f60320c'}},\n", | |
" 'file': 'sofa.png',\n", | |
" 'label': None},\n", | |
" {'_additional': None,\n", | |
" 'file': 'chair.png',\n", | |
" 'label': [{'name': 'chair'}]}]}}}" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.query.get(\n", | |
" \"Mesh\", properties=[\"file\", \"label{... on Label{name}}\"]\n", | |
").with_additional(\n", | |
" {\"classification\": [\"basedOn\", \"classifiedFields\", \"completed\", \"id\"]}\n", | |
").do()" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Why does sofa has no label?" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Try knn classification:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"classification_status = (\n", | |
" client.classification.schedule()\n", | |
" .with_type(\"knn\")\n", | |
" .with_class_name(\"Mesh\")\n", | |
" .with_based_on_properties([\"description\"])\n", | |
" .with_classify_properties([\"label\"])\n", | |
" .with_settings({\"k\": 3})\n", | |
" .do()\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'basedOnProperties': ['description'],\n", | |
" 'class': 'Mesh',\n", | |
" 'classifyProperties': ['label'],\n", | |
" 'id': '6f62cb92-7aaa-4545-a610-eb6ef93832da',\n", | |
" 'meta': {'completed': '2023-03-07T15:24:52.225Z',\n", | |
" 'count': 1,\n", | |
" 'countSucceeded': 1,\n", | |
" 'started': '2023-03-07T15:24:52.222Z'},\n", | |
" 'settings': {'k': 3},\n", | |
" 'status': 'completed',\n", | |
" 'type': 'knn'}" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.classification.get(classification_status[\"id\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'data': {'Get': {'Mesh': [{'_additional': None,\n", | |
" 'file': 'standing_desk.png',\n", | |
" 'label': [{'name': 'standing desk'}]},\n", | |
" {'_additional': {'classification': {'basedOn': [],\n", | |
" 'classifiedFields': ['label'],\n", | |
" 'completed': '2023-03-07T15:24:52.225Z',\n", | |
" 'id': '6f62cb92-7aaa-4545-a610-eb6ef93832da'}},\n", | |
" 'file': 'sofa.png',\n", | |
" 'label': [{'name': 'chair'}]},\n", | |
" {'_additional': None,\n", | |
" 'file': 'chair.png',\n", | |
" 'label': [{'name': 'chair'}]}]}}}" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.query.get(\n", | |
" \"Mesh\", properties=[\"file\", \"label{... on Label{name}}\"]\n", | |
").with_additional(\n", | |
" {\"classification\": [\"basedOn\", \"classifiedFields\", \"completed\", \"id\"]}\n", | |
").do()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".venv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.2" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment