imjacobclark/are-langchain.ipynb

## are-langchain.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "776ac1d3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: langchain in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.0.141)\n",
      "Requirement already satisfied: pypdf in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (3.8.0)\n",
      "Requirement already satisfied: openai in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.27.4)\n",
      "Requirement already satisfied: tiktoken in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.3.3)\n",
      "Requirement already satisfied: chromadb in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.3.21)\n",
      "Requirement already satisfied: unstructured in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.5.12)\n",
      "Requirement already satisfied: pdfminer in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (20191125)\n",
      "\u001b[31mERROR: Could not find a version that satisfies the requirement detectron2 (from versions: none)\u001b[0m\u001b[31m\n",
      "\u001b[0m\u001b[31mERROR: No matching distribution found for detectron2\u001b[0m\u001b[31m\n",
      "\u001b[0m"
     ]
    }
   ],
   "source": [
    "!pip3 install langchain pypdf openai tiktoken chromadb unstructured pdfminer detectron2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "73e7612a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "46970187",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain import OpenAI, ConversationChain\n",
    "from langchain.chains import RetrievalQA, SimpleSequentialChain\n",
    "from langchain.document_loaders import PyPDFLoader, UnstructuredPDFLoader\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from langchain.vectorstores import Chroma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "620c2efe",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = PyPDFLoader(\"./sre.pdf\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "bc08b0b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "pages = loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "1498f4cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = OpenAIEmbeddings()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "59cc6688",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "55687b16",
   "metadata": {},
   "outputs": [],
   "source": [
    "texts = text_splitter.split_documents(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "cd51b290",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using embedded DuckDB without persistence: data will be transient\n"
     ]
    }
   ],
   "source": [
    "docsearch = Chroma.from_documents(texts, embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "181aa9b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(model_name='text-davinci-003', temperature=0.9, openai_api_key=os.environ[\"OPENAI_API_KEY\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "2612fedb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages/langchain/chains/retrieval_qa/base.py:185: UserWarning: `VectorDBQA` is deprecated - please use `from langchain.chains import RetrievalQA`\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "qa_chain = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "be203952",
   "metadata": {},
   "outputs": [],
   "source": [
    "contextual_text = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "5ec74a43",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' Availability, latency, performance, efficiency, change management, monitoring, emergency response, and capacity planning.'"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = 'What are the most important SRE metrics to measure?'\n",
    "text = qa_chain.run(prompt)\n",
    "contextual_text += text\n",
    "contextual_text += prompt\n",
    "Chroma.add_texts(docsearch, [text])\n",
    "text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "96811991",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' This approach is not sustainable in the long-term as it is time-consuming and prone to human error. Automation is a better approach as it is more accurate and efficient.'"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"'My current team spends a lot of time manually looking at log files and rebooting servers. What is wrong with this approach?'\"\n",
    "text = qa_chain.run(prompt)\n",
    "contextual_text += text\n",
    "contextual_text += prompt\n",
    "Chroma.add_texts(docsearch, [text])\n",
    "text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "f51e3ee0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\" You can start by making a compelling case of how this strategy will help SREs, such as consistent and supported software solutions that speed ramp-up for new SREs and reducing the number of ways to perform the same task. Evaluate your organization's capabilities and fill any gaps by taking advantage of the skills already present in your company. Targeted project work, not menial work, is important for SREs. Start with the basics, such as writing a service level objective (SLO) and getting help clearing kindling. Finally, encourage a hunger for failure by reading and sharing postmortems.\""
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = 'Give me a strategy on how I can achieve this?'\n",
    "text = qa_chain.run(prompt)\n",
    "contextual_text += text\n",
    "contextual_text += prompt\n",
    "Chroma.add_texts(docsearch, [text])\n",
    "text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "88a9b169",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " No, it would not be a good idea to capture uptime of your APIs and use these to determine how much a bonus particular teams might get. Uptime is not a good metric to use for this purpose because it does not take into account the quality of the service or the user experience. It is better to use metrics such as request success rate or aggregate availability to measure the performance of a service.\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Would it be a good idea to capture uptime of my APIs and use these to determine how much a bonus particular teams might get depending on how good this uptime is?\"\n",
    "text = qa_chain.run(prompt)\n",
    "contextual_text += text\n",
    "contextual_text += prompt\n",
    "Chroma.add_texts(docsearch, [text])\n",
    "text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "64f1e2c9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' You could measure the quality of a service by looking at metrics such as service availability, unplanned downtime, cost, failure modes, and other service metrics.'"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"Give me some ideas on how I could measure the quality of a service?\"\n",
    "text = qa_chain.run(prompt)\n",
    "contextual_text += text\n",
    "contextual_text += prompt\n",
    "Chroma.add_texts(docsearch, [text])\n",
    "text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "73355e38",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' No, these metrics would not be a good way to determine bonus payments. Uptime, request success rate, and aggregate availability are better metrics to use for this purpose.'"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"Would these metrics in combination be a good way to determine bonus payments?\"\n",
    "text = qa_chain.run(prompt)\n",
    "contextual_text += text\n",
    "contextual_text += prompt\n",
    "Chroma.add_texts(docsearch, [text])\n",
    "text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "48d1aadf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' Yes, uptime, request success rate, and aggregate availability are good metrics to use for determining bonus payments.'"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"So uptime, request success rate and aggregate availability a good way to determine bonus payments?\"\n",
    "text = qa_chain.run(prompt)\n",
    "contextual_text += text\n",
    "contextual_text += prompt\n",
    "Chroma.add_texts(docsearch, [text])\n",
    "text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24278713",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 15,
	"id": "776ac1d3",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Requirement already satisfied: langchain in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.0.141)\n",
	"Requirement already satisfied: pypdf in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (3.8.0)\n",
	"Requirement already satisfied: openai in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.27.4)\n",
	"Requirement already satisfied: tiktoken in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.3.3)\n",
	"Requirement already satisfied: chromadb in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.3.21)\n",
	"Requirement already satisfied: unstructured in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (0.5.12)\n",
	"Requirement already satisfied: pdfminer in /Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages (20191125)\n",
	"\u001b[31mERROR: Could not find a version that satisfies the requirement detectron2 (from versions: none)\u001b[0m\u001b[31m\n",
	"\u001b[0m\u001b[31mERROR: No matching distribution found for detectron2\u001b[0m\u001b[31m\n",
	"\u001b[0m"
	]
	}
	],
	"source": [
	"!pip3 install langchain pypdf openai tiktoken chromadb unstructured pdfminer detectron2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "73e7612a",
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"os.environ[\"OPENAI_API_KEY\"] = \"\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 40,
	"id": "46970187",
	"metadata": {},
	"outputs": [],
	"source": [
	"from langchain import OpenAI, ConversationChain\n",
	"from langchain.chains import RetrievalQA, SimpleSequentialChain\n",
	"from langchain.document_loaders import PyPDFLoader, UnstructuredPDFLoader\n",
	"from langchain.embeddings import OpenAIEmbeddings\n",
	"from langchain.embeddings.openai import OpenAIEmbeddings\n",
	"from langchain.text_splitter import CharacterTextSplitter\n",
	"from langchain.vectorstores import Chroma"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"id": "620c2efe",
	"metadata": {},
	"outputs": [],
	"source": [
	"loader = PyPDFLoader(\"./sre.pdf\")\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"id": "bc08b0b6",
	"metadata": {},
	"outputs": [],
	"source": [
	"pages = loader.load()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"id": "1498f4cd",
	"metadata": {},
	"outputs": [],
	"source": [
	"embeddings = OpenAIEmbeddings()\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"id": "59cc6688",
	"metadata": {},
	"outputs": [],
	"source": [
	"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"id": "55687b16",
	"metadata": {},
	"outputs": [],
	"source": [
	"texts = text_splitter.split_documents(pages)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"id": "cd51b290",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Using embedded DuckDB without persistence: data will be transient\n"
	]
	}
	],
	"source": [
	"docsearch = Chroma.from_documents(texts, embeddings)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 57,
	"id": "181aa9b4",
	"metadata": {},
	"outputs": [],
	"source": [
	"llm = OpenAI(model_name='text-davinci-003', temperature=0.9, openai_api_key=os.environ[\"OPENAI_API_KEY\"])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 51,
	"id": "2612fedb",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/Jacob_Clark/anaconda3/lib/python3.10/site-packages/langchain/chains/retrieval_qa/base.py:185: UserWarning: `VectorDBQA` is deprecated - please use `from langchain.chains import RetrievalQA`\n",
	" warnings.warn(\n"
	]
	}
	],
	"source": [
	"qa_chain = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 75,
	"id": "be203952",
	"metadata": {},
	"outputs": [],
	"source": [
	"contextual_text = \"\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 88,
	"id": "5ec74a43",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"' Availability, latency, performance, efficiency, change management, monitoring, emergency response, and capacity planning.'"
	]
	},
	"execution_count": 88,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"prompt = 'What are the most important SRE metrics to measure?'\n",
	"text = qa_chain.run(prompt)\n",
	"contextual_text += text\n",
	"contextual_text += prompt\n",
	"Chroma.add_texts(docsearch, [text])\n",
	"text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 87,
	"id": "96811991",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"' This approach is not sustainable in the long-term as it is time-consuming and prone to human error. Automation is a better approach as it is more accurate and efficient.'"
	]
	},
	"execution_count": 87,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"prompt = \"'My current team spends a lot of time manually looking at log files and rebooting servers. What is wrong with this approach?'\"\n",
	"text = qa_chain.run(prompt)\n",
	"contextual_text += text\n",
	"contextual_text += prompt\n",
	"Chroma.add_texts(docsearch, [text])\n",
	"text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 95,
	"id": "f51e3ee0",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"\" You can start by making a compelling case of how this strategy will help SREs, such as consistent and supported software solutions that speed ramp-up for new SREs and reducing the number of ways to perform the same task. Evaluate your organization's capabilities and fill any gaps by taking advantage of the skills already present in your company. Targeted project work, not menial work, is important for SREs. Start with the basics, such as writing a service level objective (SLO) and getting help clearing kindling. Finally, encourage a hunger for failure by reading and sharing postmortems.\""
	]
	},
	"execution_count": 95,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"prompt = 'Give me a strategy on how I can achieve this?'\n",
	"text = qa_chain.run(prompt)\n",
	"contextual_text += text\n",
	"contextual_text += prompt\n",
	"Chroma.add_texts(docsearch, [text])\n",
	"text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 94,
	"id": "88a9b169",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" No, it would not be a good idea to capture uptime of your APIs and use these to determine how much a bonus particular teams might get. Uptime is not a good metric to use for this purpose because it does not take into account the quality of the service or the user experience. It is better to use metrics such as request success rate or aggregate availability to measure the performance of a service.\n"
	]
	}
	],
	"source": [
	"prompt = \"Would it be a good idea to capture uptime of my APIs and use these to determine how much a bonus particular teams might get depending on how good this uptime is?\"\n",
	"text = qa_chain.run(prompt)\n",
	"contextual_text += text\n",
	"contextual_text += prompt\n",
	"Chroma.add_texts(docsearch, [text])\n",
	"text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 96,
	"id": "64f1e2c9",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"' You could measure the quality of a service by looking at metrics such as service availability, unplanned downtime, cost, failure modes, and other service metrics.'"
	]
	},
	"execution_count": 96,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"prompt = \"Give me some ideas on how I could measure the quality of a service?\"\n",
	"text = qa_chain.run(prompt)\n",
	"contextual_text += text\n",
	"contextual_text += prompt\n",
	"Chroma.add_texts(docsearch, [text])\n",
	"text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 97,
	"id": "73355e38",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"' No, these metrics would not be a good way to determine bonus payments. Uptime, request success rate, and aggregate availability are better metrics to use for this purpose.'"
	]
	},
	"execution_count": 97,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"prompt = \"Would these metrics in combination be a good way to determine bonus payments?\"\n",
	"text = qa_chain.run(prompt)\n",
	"contextual_text += text\n",
	"contextual_text += prompt\n",
	"Chroma.add_texts(docsearch, [text])\n",
	"text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 98,
	"id": "48d1aadf",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"' Yes, uptime, request success rate, and aggregate availability are good metrics to use for determining bonus payments.'"
	]
	},
	"execution_count": 98,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"prompt = \"So uptime, request success rate and aggregate availability a good way to determine bonus payments?\"\n",
	"text = qa_chain.run(prompt)\n",
	"contextual_text += text\n",
	"contextual_text += prompt\n",
	"Chroma.add_texts(docsearch, [text])\n",
	"text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "24278713",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.9"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}