Skip to content

Instantly share code, notes, and snippets.

@IvanCampos
Created April 13, 2023 00:49
Show Gist options
  • Save IvanCampos/715908e57fc1c1a6acd374e31f8c8aa9 to your computer and use it in GitHub Desktop.
Save IvanCampos/715908e57fc1c1a6acd374e31f8c8aa9 to your computer and use it in GitHub Desktop.
vector-chroma-openai.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMx7I7HhfnBQQ8Lzo0e/J5s",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/IvanCampos/715908e57fc1c1a6acd374e31f8c8aa9/vector-chroma-openai.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install chromadb openai\n",
"\n",
"import os\n",
"import chromadb\n",
"import openai\n",
"\n",
"OPENAI_API_KEY = \"YOUR_OPENAI_API_KEY\"\n",
"\n",
"client = chromadb.Client()\n",
"openai.api_key = OPENAI_API_KEY"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DSFAZXvsQow1",
"outputId": "faaabfd3-2b4a-43b9-dfd7-6f41f56348cf"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: chromadb in /usr/local/lib/python3.9/dist-packages (0.3.21)\n",
"Requirement already satisfied: openai in /usr/local/lib/python3.9/dist-packages (0.27.4)\n",
"Requirement already satisfied: clickhouse-connect>=0.5.7 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.5.20)\n",
"Requirement already satisfied: sentence-transformers>=2.2.2 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.2.2)\n",
"Requirement already satisfied: numpy>=1.21.6 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.22.4)\n",
"Requirement already satisfied: requests>=2.28 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.28.2)\n",
"Requirement already satisfied: fastapi>=0.85.1 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.95.0)\n",
"Requirement already satisfied: uvicorn[standard]>=0.18.3 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.21.1)\n",
"Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.10.7)\n",
"Requirement already satisfied: pandas>=1.3 in /usr/local/lib/python3.9/dist-packages (from chromadb) (1.5.3)\n",
"Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.9/dist-packages (from chromadb) (2.5.0)\n",
"Requirement already satisfied: hnswlib>=0.7 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.7.0)\n",
"Requirement already satisfied: duckdb>=0.7.1 in /usr/local/lib/python3.9/dist-packages (from chromadb) (0.7.1)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.9/dist-packages (from openai) (3.8.4)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from openai) (4.65.0)\n",
"Requirement already satisfied: pytz in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (2022.7.1)\n",
"Requirement already satisfied: zstandard in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (0.20.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (2022.12.7)\n",
"Requirement already satisfied: urllib3>=1.26 in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (1.26.15)\n",
"Requirement already satisfied: lz4 in /usr/local/lib/python3.9/dist-packages (from clickhouse-connect>=0.5.7->chromadb) (4.3.2)\n",
"Requirement already satisfied: starlette<0.27.0,>=0.26.1 in /usr/local/lib/python3.9/dist-packages (from fastapi>=0.85.1->chromadb) (0.26.1)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=1.3->chromadb) (2.8.2)\n",
"Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (1.6)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n",
"Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.9/dist-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n",
"Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic>=1.9->chromadb) (4.5.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests>=2.28->chromadb) (3.4)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.9/dist-packages (from requests>=2.28->chromadb) (2.0.12)\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.1.98)\n",
"Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.13.4)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (1.2.2)\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (1.10.1)\n",
"Requirement already satisfied: nltk in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (3.8.1)\n",
"Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (0.15.1+cu118)\n",
"Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (2.0.0+cu118)\n",
"Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /usr/local/lib/python3.9/dist-packages (from sentence-transformers>=2.2.2->chromadb) (4.27.4)\n",
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (8.1.3)\n",
"Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n",
"Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.17.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (6.0)\n",
"Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.0)\n",
"Requirement already satisfied: watchfiles>=0.13 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n",
"Requirement already satisfied: httptools>=0.5.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.5.0)\n",
"Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.9/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (11.0.1)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (4.0.2)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (6.0.4)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (1.8.2)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (22.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (1.3.3)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp->openai) (1.3.1)\n",
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers>=2.2.2->chromadb) (23.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from huggingface-hub>=0.4.0->sentence-transformers>=2.2.2->chromadb) (3.11.0)\n",
"Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.9/dist-packages (from starlette<0.27.0,>=0.26.1->fastapi>=0.85.1->chromadb) (3.6.2)\n",
"Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (2.0.0)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.1)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.1.2)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (1.11.1)\n",
"Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (3.25.2)\n",
"Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (16.0.1)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers>=2.2.2->chromadb) (0.13.3)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers>=2.2.2->chromadb) (2022.10.31)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from nltk->sentence-transformers>=2.2.2->chromadb) (1.2.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn->sentence-transformers>=2.2.2->chromadb) (3.1.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->sentence-transformers>=2.2.2->chromadb) (8.4.0)\n",
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi>=0.85.1->chromadb) (1.3.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (2.1.2)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch>=1.6.0->sentence-transformers>=2.2.2->chromadb) (1.3.0)\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:chromadb:Using embedded DuckDB without persistence: data will be transient\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"def complete(prompt):\n",
" res = openai.ChatCompletion.create(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[{\"role\": \"user\", \"content\": prompt}]\n",
" )\n",
" return res['choices'][0]['message']['content'].strip()\n",
"\n",
"def get_ada_embedding(text):\n",
" text = text.replace(\"\\n\", \" \")\n",
" return openai.Embedding.create(input=text, model=\"text-embedding-ada-002\")[\"data\"][0][\"embedding\"]\n"
],
"metadata": {
"id": "ckyzgAsuc4Vr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"CHROMA_TABLE_NAME = \"chroma-openai\"\n",
"\n",
"from chromadb.utils import embedding_functions\n",
"openai_ef = embedding_functions.OpenAIEmbeddingFunction(\n",
" api_key=OPENAI_API_KEY,\n",
" model_name=\"text-embedding-ada-002\"\n",
" )\n",
"collection = client.create_collection(CHROMA_TABLE_NAME, embedding_function=openai_ef)\n",
"\n",
"texts = [\n",
" \"AI Agents as virtual employees are the future\",\n",
" \"Vector Databases are the future\",\n",
" \"AGI is not here....yet.\"\n",
" ]\n",
"\n",
"for loopIndex, text in enumerate(texts, start=1):\n",
" collection.add(embeddings=get_ada_embedding(text), metadatas=[{\"text\": text}], ids=[\"test-openai-\"+str(loopIndex)]) \n"
],
"metadata": {
"id": "KNO61o4Hbflx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"query_text = \"are ai agents the future?\"\n",
"\n",
"results = collection.query(\n",
" query_embeddings=get_ada_embedding(query_text), \n",
" n_results=3\n",
" )\n",
"\n",
"import json\n",
"print(json.dumps(results, indent=2))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MxSH-YaQfw7L",
"outputId": "7165e7a1-86e4-4ad3-8a0c-4244570a520b"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"ids\": [\n",
" [\n",
" \"test-openai-1\",\n",
" \"test-openai-3\",\n",
" \"test-openai-2\"\n",
" ]\n",
" ],\n",
" \"embeddings\": null,\n",
" \"documents\": [\n",
" [\n",
" null,\n",
" null,\n",
" null\n",
" ]\n",
" ],\n",
" \"metadatas\": [\n",
" [\n",
" {\n",
" \"text\": \"AI Agents as virtual employees are the future\"\n",
" },\n",
" {\n",
" \"text\": \"AGI is not here....yet.\"\n",
" },\n",
" {\n",
" \"text\": \"Vector Databases are the future\"\n",
" }\n",
" ]\n",
" ],\n",
" \"distances\": [\n",
" [\n",
" 0.2265148013830185,\n",
" 0.40490707755088806,\n",
" 0.46962735056877136\n",
" ]\n",
" ]\n",
"}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"client.delete_collection(name=\"chroma-openai\")"
],
"metadata": {
"id": "GJe9ELXnrITV"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment