-
-
Save IvanCampos/715908e57fc1c1a6acd374e31f8c8aa9 to your computer and use it in GitHub Desktop.
vector-chroma-openai.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyMx7I7HhfnBQQ8Lzo0e/J5s", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/IvanCampos/715908e57fc1c1a6acd374e31f8c8aa9/vector-chroma-openai.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!pip install chromadb openai\n", | |
"\n", | |
"import os\n", | |
"import chromadb\n", | |
"import openai\n", | |
"\n", | |
"OPENAI_API_KEY = \"YOUR_OPENAI_API_KEY\"\n", | |
"\n", | |
"client = chromadb.Client()\n", | |
"openai.api_key = OPENAI_API_KEY" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "DSFAZXvsQow1", | |
"outputId": "faaabfd3-2b4a-43b9-dfd7-6f41f56348cf" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Requirement already satisfied: chromadb in /usr/local/lib/python3.9/dist-packages (0.3.21)\n", | |
"Requirement already satisfied: openai in /usr/local/lib/python3.9/dist-packages (0.27.4)\n", | |
"Requirement already satisfied: clickhouse-connect>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.5.20)\n", | |
"Requirement already satisfied: sentence-transformers>=2.2.2 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.2.2)\n", | |
"Requirement already satisfied: numpy>=1.21.6 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.22.4)\n", | |
"Requirement already satisfied: requests>=2.28 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.28.2)\n", | |
"Requirement already satisfied: fastapi>=0.85.1 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.95.0)\n", | |
"Requirement already satisfied: uvicorn[standard]>=0.18.3 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.21.1)\n", | |
"Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.10.7)\n", | |
"Requirement already satisfied: pandas>=1.3 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.5.3)\n", | |
"Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.5.0)\n", | |
"Requirement already satisfied: hnswlib>=0.7 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.7.0)\n", | |
"Requirement already satisfied: duckdb>=0.7.1 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.7.1)\n", | |
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.9/dist-packages (from openai) (3.8.4)\n", | |
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from openai) (4.65.0)\n", | |
"Requirement already satisfied: pytz in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (2022.7.1)\n", | |
"Requirement already satisfied: zstandard in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (0.20.0)\n", | |
"Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (2022.12.7)\n", | |
"Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (1.26.15)\n", | |
"Requirement already satisfied: lz4 in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (4.3.2)\n", | |
"Requirement already satisfied: starlette<0.27.0,>=0.26.1 in /usr/local/lib/python3.9/dist-packages (from fastapi>=0.85.1->chromadb) (0.26.1)\n", | |
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=1.3->chromadb) (2.8.2)\n", | |
"Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (1.6)\n", | |
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", | |
"Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n", | |
"Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic>=1.9->chromadb) (4.5.0)\n", | |
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.28->chromadb) (3.4)\n", | |
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests>=2.28->chromadb) (2.0.12)\n", | |
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.1.98)\n", | |
"Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.13.4)\n", | |
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (1.2.2)\n", | |
"Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (1.10.1)\n", | |
"Requirement already satisfied: nltk in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (3.8.1)\n", | |
"Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.15.1+cu118)\n", | |
"Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (2.0.0+cu118)\n", | |
"Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (4.27.4)\n", | |
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.3)\n", | |
"Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n", | |
"Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n", | |
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0)\n", | |
"Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n", | |
"Requirement already satisfied: watchfiles>=0.13 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n", | |
"Requirement already satisfied: httptools>=0.5.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.5.0)\n", | |
"Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.1)\n", | |
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (4.0.2)\n", | |
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (6.0.4)\n", | |
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (1.8.2)\n", | |
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (22.2.0)\n", | |
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (1.3.3)\n", | |
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (1.3.1)\n", | |
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers>=2.2.2->chromadb) (23.0)\n", | |
"Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers>=2.2.2->chromadb) (3.11.0)\n", | |
"Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.9/dist-packages (from starlette<0.27.0,>=0.26.1->fastapi>=0.85.1->chromadb) (3.6.2)\n", | |
"Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (2.0.0)\n", | |
"Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.1)\n", | |
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.1.2)\n", | |
"Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (1.11.1)\n", | |
"Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.25.2)\n", | |
"Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (16.0.1)\n", | |
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers>=2.2.2->chromadb) (0.13.3)\n", | |
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers>=2.2.2->chromadb) (2022.10.31)\n", | |
"Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from nltk->sentence-transformers>=2.2.2->chromadb) (1.2.0)\n", | |
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn->sentence-transformers>=2.2.2->chromadb) (3.1.0)\n", | |
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->sentence-transformers>=2.2.2->chromadb) (8.4.0)\n", | |
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi>=0.85.1->chromadb) (1.3.0)\n", | |
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (2.1.2)\n", | |
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (1.3.0)\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"WARNING:chromadb:Using embedded DuckDB without persistence: data will be transient\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def complete(prompt):\n", | |
" res = openai.ChatCompletion.create(\n", | |
" model=\"gpt-3.5-turbo\",\n", | |
" messages=[{\"role\": \"user\", \"content\": prompt}]\n", | |
" )\n", | |
" return res['choices'][0]['message']['content'].strip()\n", | |
"\n", | |
"def get_ada_embedding(text):\n", | |
" text = text.replace(\"\\n\", \" \")\n", | |
" return openai.Embedding.create(input=text, model=\"text-embedding-ada-002\")[\"data\"][0][\"embedding\"]\n" | |
], | |
"metadata": { | |
"id": "ckyzgAsuc4Vr" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"CHROMA_TABLE_NAME = \"chroma-openai\"\n", | |
"\n", | |
"from chromadb.utils import embedding_functions\n", | |
"openai_ef = embedding_functions.OpenAIEmbeddingFunction(\n", | |
" api_key=OPENAI_API_KEY,\n", | |
" model_name=\"text-embedding-ada-002\"\n", | |
" )\n", | |
"collection = client.create_collection(CHROMA_TABLE_NAME, embedding_function=openai_ef)\n", | |
"\n", | |
"texts = [\n", | |
" \"AI Agents as virtual employees are the future\",\n", | |
" \"Vector Databases are the future\",\n", | |
" \"AGI is not here....yet.\"\n", | |
" ]\n", | |
"\n", | |
"for loopIndex, text in enumerate(texts, start=1):\n", | |
" collection.add(embeddings=get_ada_embedding(text), metadatas=[{\"text\": text}], ids=[\"test-openai-\"+str(loopIndex)]) \n" | |
], | |
"metadata": { | |
"id": "KNO61o4Hbflx" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"query_text = \"are ai agents the future?\"\n", | |
"\n", | |
"results = collection.query(\n", | |
" query_embeddings=get_ada_embedding(query_text), \n", | |
" n_results=3\n", | |
" )\n", | |
"\n", | |
"import json\n", | |
"print(json.dumps(results, indent=2))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "MxSH-YaQfw7L", | |
"outputId": "7165e7a1-86e4-4ad3-8a0c-4244570a520b" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"{\n", | |
" \"ids\": [\n", | |
" [\n", | |
" \"test-openai-1\",\n", | |
" \"test-openai-3\",\n", | |
" \"test-openai-2\"\n", | |
" ]\n", | |
" ],\n", | |
" \"embeddings\": null,\n", | |
" \"documents\": [\n", | |
" [\n", | |
" null,\n", | |
" null,\n", | |
" null\n", | |
" ]\n", | |
" ],\n", | |
" \"metadatas\": [\n", | |
" [\n", | |
" {\n", | |
" \"text\": \"AI Agents as virtual employees are the future\"\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"AGI is not here....yet.\"\n", | |
" },\n", | |
" {\n", | |
" \"text\": \"Vector Databases are the future\"\n", | |
" }\n", | |
" ]\n", | |
" ],\n", | |
" \"distances\": [\n", | |
" [\n", | |
" 0.2265148013830185,\n", | |
" 0.40490707755088806,\n", | |
" 0.46962735056877136\n", | |
" ]\n", | |
" ]\n", | |
"}\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"client.delete_collection(name=\"chroma-openai\")" | |
], | |
"metadata": { | |
"id": "GJe9ELXnrITV" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment