hsm207/gpt4alllangchain.ipynb

## gpt4alllangchain.ipynb
{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Using OpeanAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# set the openai key in the environment\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-YOUROPENAIAPIKEY\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.vectorstores import Chroma\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from langchain.llms import OpenAI\n",
    "from langchain.chains import RetrievalQA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using embedded DuckDB without persistence: data will be transient\n"
     ]
    }
   ],
   "source": [
    "from langchain.document_loaders import TextLoader\n",
    "loader = TextLoader(\"/workspaces/privateGPT/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "texts = text_splitter.split_documents(documents)\n",
    "\n",
    "embeddings = OpenAIEmbeddings()\n",
    "docsearch = Chroma.from_documents(texts, embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", retriever=docsearch.as_retriever())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and is a former top litigator, federal public defender, and comes from a family of public school educators and police officers. He said that she is a consensus builder and has received broad support from the Fraternal Order of Police and former judges appointed by Democrats and Republicans.\""
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "qa.run(query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Using GPT4All"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 1000\n",
      "llama_model_load: n_embd  = 5120\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 40\n",
      "llama_model_load: n_layer = 40\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 13824\n",
      "llama_model_load: n_parts = 2\n",
      "llama_model_load: type    = 2\n",
      "llama_model_load: ggml map size = 7759.83 MB\n",
      "llama_model_load: ggml ctx size = 101.25 KB\n",
      "llama_model_load: mem required  = 9807.93 MB (+ 3216.00 MB per state)\n",
      "llama_model_load: loading tensors from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin'\n",
      "llama_model_load: model size =  7759.39 MB / num tensors = 363\n",
      "llama_init_from_file: kv self size  = 1562.50 MB\n"
     ]
    }
   ],
   "source": [
    "from langchain.llms import GPT4All\n",
    "\n",
    "# Instantiate the model. Callbacks support token-wise streaming\n",
    "model = GPT4All(model=\"/workspaces/models/ggml-gpt4all-l13b-snoozy.bin\", n_ctx=1000, n_threads=8)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mCanceled future for execute_request message before replies were done"
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "qa.run(query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Using LlamaCpp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.llms import LlamaCpp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama.cpp: loading model from /workspaces/models/ggml-model-q4_0.bin\n",
      "llama.cpp: can't use mmap because tensors are not aligned; convert to new format to avoid this\n",
      "llama_model_load_internal: format     = 'ggml' (old version with low tokenizer quality and no mmap support)\n",
      "llama_model_load_internal: n_vocab    = 32000\n",
      "llama_model_load_internal: n_ctx      = 2000\n",
      "llama_model_load_internal: n_embd     = 4096\n",
      "llama_model_load_internal: n_mult     = 256\n",
      "llama_model_load_internal: n_head     = 32\n",
      "llama_model_load_internal: n_layer    = 32\n",
      "llama_model_load_internal: n_rot      = 128\n",
      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
      "llama_model_load_internal: n_ff       = 11008\n",
      "llama_model_load_internal: n_parts    = 1\n",
      "llama_model_load_internal: model size = 7B\n",
      "llama_model_load_internal: ggml ctx size = 4113748.20 KB\n",
      "llama_model_load_internal: mem required  = 5809.33 MB (+ 1026.00 MB per state)\n",
      "...................................................................................................\n",
      ".\n",
      "llama_init_from_file: kv self size  = 1000.00 MB\n",
      "AVX = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 1 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | \n"
     ]
    }
   ],
   "source": [
    "model = LlamaCpp(model_path=\"/workspaces/models/ggml-model-q4_0.bin\", n_ctx=2000, n_threads=8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\" The President said that Justice Breyer's legacy of excellence should be continued by Judge Keitanji Brown Jackson, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He is a congressional builder who has received a broad range of support.\""
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qa.run(query)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Using OpeanAI"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"\n",
	"# set the openai key in the environment\n",
	"os.environ[\"OPENAI_API_KEY\"] = \"sk-YOUROPENAIAPIKEY\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"from langchain.embeddings.openai import OpenAIEmbeddings\n",
	"from langchain.vectorstores import Chroma\n",
	"from langchain.text_splitter import CharacterTextSplitter\n",
	"from langchain.llms import OpenAI\n",
	"from langchain.chains import RetrievalQA"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Using embedded DuckDB without persistence: data will be transient\n"
	]
	}
	],
	"source": [
	"from langchain.document_loaders import TextLoader\n",
	"loader = TextLoader(\"/workspaces/privateGPT/state_of_the_union.txt\")\n",
	"documents = loader.load()\n",
	"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
	"texts = text_splitter.split_documents(documents)\n",
	"\n",
	"embeddings = OpenAIEmbeddings()\n",
	"docsearch = Chroma.from_documents(texts, embeddings)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", retriever=docsearch.as_retriever())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and is a former top litigator, federal public defender, and comes from a family of public school educators and police officers. He said that she is a consensus builder and has received broad support from the Fraternal Order of Police and former judges appointed by Democrats and Republicans.\""
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"query = \"What did the president say about Ketanji Brown Jackson\"\n",
	"qa.run(query)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Using GPT4All"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"llama_model_load: loading model from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin' - please wait ...\n",
	"llama_model_load: n_vocab = 32000\n",
	"llama_model_load: n_ctx = 1000\n",
	"llama_model_load: n_embd = 5120\n",
	"llama_model_load: n_mult = 256\n",
	"llama_model_load: n_head = 40\n",
	"llama_model_load: n_layer = 40\n",
	"llama_model_load: n_rot = 128\n",
	"llama_model_load: f16 = 2\n",
	"llama_model_load: n_ff = 13824\n",
	"llama_model_load: n_parts = 2\n",
	"llama_model_load: type = 2\n",
	"llama_model_load: ggml map size = 7759.83 MB\n",
	"llama_model_load: ggml ctx size = 101.25 KB\n",
	"llama_model_load: mem required = 9807.93 MB (+ 3216.00 MB per state)\n",
	"llama_model_load: loading tensors from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin'\n",
	"llama_model_load: model size = 7759.39 MB / num tensors = 363\n",
	"llama_init_from_file: kv self size = 1562.50 MB\n"
	]
	}
	],
	"source": [
	"from langchain.llms import GPT4All\n",
	"\n",
	"# Instantiate the model. Callbacks support token-wise streaming\n",
	"model = GPT4All(model=\"/workspaces/models/ggml-gpt4all-l13b-snoozy.bin\", n_ctx=1000, n_threads=8)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [],
	"source": [
	"qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"ename": "",
	"evalue": "",
	"output_type": "error",
	"traceback": [
	"\u001b[1;31mCanceled future for execute_request message before replies were done"
	]
	},
	{
	"ename": "",
	"evalue": "",
	"output_type": "error",
	"traceback": [
	"\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
	]
	}
	],
	"source": [
	"qa.run(query)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Using LlamaCpp"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"from langchain.llms import LlamaCpp"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"llama.cpp: loading model from /workspaces/models/ggml-model-q4_0.bin\n",
	"llama.cpp: can't use mmap because tensors are not aligned; convert to new format to avoid this\n",
	"llama_model_load_internal: format = 'ggml' (old version with low tokenizer quality and no mmap support)\n",
	"llama_model_load_internal: n_vocab = 32000\n",
	"llama_model_load_internal: n_ctx = 2000\n",
	"llama_model_load_internal: n_embd = 4096\n",
	"llama_model_load_internal: n_mult = 256\n",
	"llama_model_load_internal: n_head = 32\n",
	"llama_model_load_internal: n_layer = 32\n",
	"llama_model_load_internal: n_rot = 128\n",
	"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
	"llama_model_load_internal: n_ff = 11008\n",
	"llama_model_load_internal: n_parts = 1\n",
	"llama_model_load_internal: model size = 7B\n",
	"llama_model_load_internal: ggml ctx size = 4113748.20 KB\n",
	"llama_model_load_internal: mem required = 5809.33 MB (+ 1026.00 MB per state)\n",
	"...................................................................................................\n",
	".\n",
	"llama_init_from_file: kv self size = 1000.00 MB\n",
	"AVX = 1 \| AVX2 = 1 \| AVX512 = 1 \| AVX512_VBMI = 0 \| AVX512_VNNI = 1 \| FMA = 1 \| NEON = 0 \| ARM_FMA = 0 \| F16C = 1 \| FP16_VA = 0 \| WASM_SIMD = 0 \| BLAS = 0 \| SSE3 = 1 \| VSX = 0 \| \n"
	]
	}
	],
	"source": [
	"model = LlamaCpp(model_path=\"/workspaces/models/ggml-model-q4_0.bin\", n_ctx=2000, n_threads=8)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"\" The President said that Justice Breyer's legacy of excellence should be continued by Judge Keitanji Brown Jackson, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He is a congressional builder who has received a broad range of support.\""
	]
	},
	"execution_count": 21,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"qa.run(query)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.3"
	},
	"orig_nbformat": 4
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}