Skip to content

Instantly share code, notes, and snippets.

@hsm207
Created March 9, 2023 20:46
Show Gist options
  • Save hsm207/84ea672d1dda1db83f80a20b4ea8a069 to your computer and use it in GitHub Desktop.
Save hsm207/84ea672d1dda1db83f80a20b4ea8a069 to your computer and use it in GitHub Desktop.
How to use the classification endpoint in weaviate
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Introduction"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook shows how to upload a dataset in weaviate to be able to use the [classification endpoint](https://weaviate.io/developers/weaviate/api/rest/classification)."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import weaviate"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Connect to weaviate and clear the database:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"username = os.getenv(\"WEAVIATE_USERNAME\")\n",
"password = os.getenv(\"WEAVIATE_PASSWORD\")\n",
"\n",
"url = \"https://jinaai.weaviate.network/\"\n",
"\n",
"secret = weaviate.AuthClientPassword(username=username, password=password)\n",
"\n",
"client = weaviate.Client(url, auth_client_secret=secret)\n",
"\n",
"client.schema.delete_all()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dataset"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Make up some dummy dataset."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"First, we create some labels in weaviate:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"labels = [\"chair\", \"standing desk\", \"tree\"]\n",
"\n",
"\n",
"uuids = [\n",
" client.data_object.create(data_object={\"name\": label}, class_name=\"Label\")\n",
" for label in labels\n",
"]\n",
"\n",
"label2uuid = {label: uuid for label, uuid in zip(labels, uuids)}"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, create a Mesh class with a cross reference to the Label class:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"client.schema.create_class(\n",
" {\n",
" \"class\": \"Mesh\",\n",
" \"properties\": [\n",
" {\"name\": \"name\", \"dataType\": [\"string\"]},\n",
" {\"name\": \"label\", \"dataType\": [\"Label\"]},\n",
" {\"name\": \"file\", \"dataType\": [\"string\"]},\n",
" {\"name\": \"description\", \"dataType\": [\"text\"]},\n",
" ],\n",
" }\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Upload some meshes:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"mesh_1 = {\n",
" \"label\": \"chair\",\n",
" \"vector\": [1, 0],\n",
" \"file\": \"chair.png\",\n",
" \"description\": \"Experience unparalleled comfort with this ergonomically designed chair that conforms to your body's natural curves, providing support and reducing strain on your back\",\n",
"}\n",
"\n",
"mesh_2 = {\n",
" \"label\": \"standing desk\",\n",
" \"vector\": [-1, 0],\n",
" \"file\": \"standing_desk.png\",\n",
" \"description\": \"Indulge in the ultimate symbol of luxury with a one-of-a-kind standing desk encrusted with rare jewels and precious metals, featuring an interactive holographic display and cutting-edge technology, worth billions of dollars.\",\n",
"}\n",
"\n",
"meshes = [mesh_1, mesh_2]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"for mesh in meshes:\n",
" vector = mesh.pop(\"vector\")\n",
" label = mesh.pop(\"label\")\n",
"\n",
" label_uuid = label2uuid[label]\n",
"\n",
" mesh_uuid = client.data_object.create(\n",
" data_object=mesh, class_name=\"Mesh\", vector=vector\n",
" )\n",
"\n",
" client.data_object.reference.add(\n",
" from_uuid=mesh_uuid,\n",
" from_property_name=\"label\",\n",
" to_uuid=label_uuid,\n",
" from_class_name=\"Mesh\",\n",
" to_class_name=\"Label\",\n",
" )"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Classification"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Upload a mesh without a label:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'5843a613-307d-4a50-b967-9052f7a6b014'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mesh_3 = {\n",
" \"vector\": [1, 0.1],\n",
" \"file\": \"sofa.png\",\n",
" \"description\": \"Upgrade your living space without breaking the bank with a high-quality second hand sofa that is not only affordable but also sustainable.\",\n",
"}\n",
"\n",
"vector = mesh_3.pop(\"vector\")\n",
"\n",
"client.data_object.create(data_object=mesh_3, class_name=\"Mesh\", vector=vector)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Make the classification request:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"classification_status = (\n",
" client.classification.schedule()\n",
" .with_type(\"zeroshot\")\n",
" .with_class_name(\"Mesh\")\n",
" .with_classify_properties([\"label\"])\n",
" .with_based_on_properties([\"description\"])\n",
" .do()\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Wait for the classification to finish:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'basedOnProperties': ['description'],\n",
" 'class': 'Mesh',\n",
" 'classifyProperties': ['label'],\n",
" 'id': 'd1fe6e11-684c-4249-81fb-bd119f60320c',\n",
" 'meta': {'completed': '2023-03-07T15:24:51.447Z',\n",
" 'count': 1,\n",
" 'countSucceeded': 1,\n",
" 'started': '2023-03-07T15:24:51.445Z'},\n",
" 'status': 'completed',\n",
" 'type': 'zeroshot'}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.classification.get(classification_status[\"id\"])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Check the results:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'data': {'Get': {'Mesh': [{'_additional': None,\n",
" 'file': 'standing_desk.png',\n",
" 'label': [{'name': 'standing desk'}]},\n",
" {'_additional': {'classification': {'basedOn': [],\n",
" 'classifiedFields': [],\n",
" 'completed': '2023-03-07T15:24:51.447Z',\n",
" 'id': 'd1fe6e11-684c-4249-81fb-bd119f60320c'}},\n",
" 'file': 'sofa.png',\n",
" 'label': None},\n",
" {'_additional': None,\n",
" 'file': 'chair.png',\n",
" 'label': [{'name': 'chair'}]}]}}}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.query.get(\n",
" \"Mesh\", properties=[\"file\", \"label{... on Label{name}}\"]\n",
").with_additional(\n",
" {\"classification\": [\"basedOn\", \"classifiedFields\", \"completed\", \"id\"]}\n",
").do()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Why does sofa has no label?"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Try knn classification:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"classification_status = (\n",
" client.classification.schedule()\n",
" .with_type(\"knn\")\n",
" .with_class_name(\"Mesh\")\n",
" .with_based_on_properties([\"description\"])\n",
" .with_classify_properties([\"label\"])\n",
" .with_settings({\"k\": 3})\n",
" .do()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'basedOnProperties': ['description'],\n",
" 'class': 'Mesh',\n",
" 'classifyProperties': ['label'],\n",
" 'id': '6f62cb92-7aaa-4545-a610-eb6ef93832da',\n",
" 'meta': {'completed': '2023-03-07T15:24:52.225Z',\n",
" 'count': 1,\n",
" 'countSucceeded': 1,\n",
" 'started': '2023-03-07T15:24:52.222Z'},\n",
" 'settings': {'k': 3},\n",
" 'status': 'completed',\n",
" 'type': 'knn'}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.classification.get(classification_status[\"id\"])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'data': {'Get': {'Mesh': [{'_additional': None,\n",
" 'file': 'standing_desk.png',\n",
" 'label': [{'name': 'standing desk'}]},\n",
" {'_additional': {'classification': {'basedOn': [],\n",
" 'classifiedFields': ['label'],\n",
" 'completed': '2023-03-07T15:24:52.225Z',\n",
" 'id': '6f62cb92-7aaa-4545-a610-eb6ef93832da'}},\n",
" 'file': 'sofa.png',\n",
" 'label': [{'name': 'chair'}]},\n",
" {'_additional': None,\n",
" 'file': 'chair.png',\n",
" 'label': [{'name': 'chair'}]}]}}}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.query.get(\n",
" \"Mesh\", properties=[\"file\", \"label{... on Label{name}}\"]\n",
").with_additional(\n",
" {\"classification\": [\"basedOn\", \"classifiedFields\", \"completed\", \"id\"]}\n",
").do()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment