Last active
September 4, 2023 21:12
-
-
Save dudanogueira/5e6b35cb6af3a28e93105b0d5fefb1dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "64406e3b-95eb-4541-9c77-80817d9bfc9b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import weaviate\n", | |
"client = weaviate.Client(\"http://localhost:8080\")\n", | |
"client.is_ready()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "6c11210f-2546-40c5-8be9-e31698b074db", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"client.schema.delete_all()\n", | |
"class_obj = {'class': 'Label',\n", | |
" 'description': 'Labels for documents',\n", | |
" 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},\n", | |
" 'cleanupIntervalSeconds': 60,\n", | |
" 'indexNullState': True,\n", | |
" 'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},\n", | |
" 'moduleConfig': {'text2vec-openai': {'model': 'ada',\n", | |
" 'modelVersion': '002',\n", | |
" 'type': 'text',\n", | |
" 'vectorizeClassName': False}},\n", | |
" 'properties': [{'dataType': ['text'],\n", | |
" 'description': 'Content of the label',\n", | |
" 'indexFilterable': True,\n", | |
" 'indexSearchable': True,\n", | |
" 'moduleConfig': {'text2vec-openai': {'skip': False,\n", | |
" 'vectorizePropertyName': False}},\n", | |
" 'name': 'content',\n", | |
" 'tokenization': 'field'},],\n", | |
" 'replicationConfig': {'factor': 1},\n", | |
" 'shardingConfig': {'virtualPerPhysical': 128,\n", | |
" 'desiredCount': 1,\n", | |
" 'actualCount': 1,\n", | |
" 'desiredVirtualCount': 128,\n", | |
" 'actualVirtualCount': 128,\n", | |
" 'key': '_id',\n", | |
" 'strategy': 'hash',\n", | |
" 'function': 'murmur3'},\n", | |
" 'vectorIndexConfig': {'skip': False,\n", | |
" 'cleanupIntervalSeconds': 300,\n", | |
" 'maxConnections': 64,\n", | |
" 'efConstruction': 128,\n", | |
" 'ef': -1,\n", | |
" 'dynamicEfMin': 100,\n", | |
" 'dynamicEfMax': 500,\n", | |
" 'dynamicEfFactor': 8,\n", | |
" 'vectorCacheMaxObjects': 1000000000000,\n", | |
" 'flatSearchCutoff': 40000,\n", | |
" 'distance': 'cosine',\n", | |
" 'pq': {'enabled': False,\n", | |
" 'bitCompression': False,\n", | |
" 'segments': 0,\n", | |
" 'centroids': 256,\n", | |
" 'encoder': {'type': 'kmeans', 'distribution': 'log-normal'}}},\n", | |
" 'vectorIndexType': 'hnsw',\n", | |
" 'vectorizer': 'text2vec-openai'}\n", | |
"\n", | |
"# add the schema\n", | |
"client.schema.create_class(class_obj)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "aa40f993-e441-4f80-aafc-a298611f76e0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'cf991339-1c7f-4aea-b1ea-cf39e250b34d'" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# add an object before adding this new property\n", | |
"client.data_object.create({\"content\": \"Sample Content, only content\"}, \"Label\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "cf8d11bc-2a7b-4ee0-aa8a-f4fc8c20d5ed", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# let's add a new type property\n", | |
"add_prop = {'dataType': ['text'],\n", | |
" 'description': 'type or substype of the label, used for filtering',\n", | |
" 'indexFilterable': True,\n", | |
" 'indexSearchable': True,\n", | |
" 'moduleConfig': {'text2vec-openai': {'skip': True,\n", | |
" 'vectorizePropertyName': False}},\n", | |
" 'name': 'type',\n", | |
" 'tokenization': 'word'}\n", | |
"\n", | |
"client.schema.property.create('Label', add_prop)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "6fb6ff33-ee73-45d1-843f-f03003451f19", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'data': {'Get': {'Label': [{'content': 'Sample Content, only content',\n", | |
" 'type': None}]}}}" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# for now we only have one object\n", | |
"client.query.get(\"Label\", \"content type\").do()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "b24ae198-26d1-4fad-8a47-0f097612f850", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'data': {'Get': {'Label': []}}}" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# the previously added object will not appear on the filter\n", | |
"client.query.get(\"Label\", \"content type\").with_where({\n", | |
" \"path\": \"type\",\n", | |
" \"operator\": \"IsNull\",\n", | |
" \"valueBoolean\": True\n", | |
"}).do()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "b9ac5af7-624e-41ca-8e91-1036b160379b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'e5ba4f5f-9104-49f1-90d7-85592b6644c4'" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# but, if we add a new object\n", | |
"client.data_object.create({\"content\": \"Sample Content, only content, no type, added later\"}, \"Label\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "9a7b7997-06bf-4903-a00d-6f64710143a9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'data': {'Get': {'Label': [{'content': 'Sample Content, only content, no type, added later',\n", | |
" 'type': None}]}}}" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# if will then appear with the isNull filter\n", | |
"client.query.get(\"Label\", \"content type\").with_where({\n", | |
" \"path\": \"type\",\n", | |
" \"operator\": \"IsNull\",\n", | |
" \"valueBoolean\": True\n", | |
"}).do()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"id": "b2576b12-2696-4b2e-9939-2aee01c23c2c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'data': {'Get': {'Label': [{'content': 'Sample Content, only content',\n", | |
" 'type': None},\n", | |
" {'content': 'Sample Content, only content, no type, added later',\n", | |
" 'type': None}]}}}" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# It should show both objects\n", | |
"client.query.get(\"Label\", \"content type\").do()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment