Skip to content

Instantly share code, notes, and snippets.

@hsm207
Created May 17, 2023 12:35
Show Gist options
  • Save hsm207/69b3c24b231375b74e8a5ab6f57ffe58 to your computer and use it in GitHub Desktop.
Save hsm207/69b3c24b231375b74e8a5ab6f57ffe58 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using OpeanAI"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"# set the openai key in the environment\n",
"os.environ[\"OPENAI_API_KEY\"] = \"sk-YOUROPENAIAPIKEY\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.vectorstores import Chroma\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.llms import OpenAI\n",
"from langchain.chains import RetrievalQA"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n"
]
}
],
"source": [
"from langchain.document_loaders import TextLoader\n",
"loader = TextLoader(\"/workspaces/privateGPT/state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()\n",
"docsearch = Chroma.from_documents(texts, embeddings)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", retriever=docsearch.as_retriever())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and is a former top litigator, federal public defender, and comes from a family of public school educators and police officers. He said that she is a consensus builder and has received broad support from the Fraternal Order of Police and former judges appointed by Democrats and Republicans.\""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"qa.run(query)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using GPT4All"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"llama_model_load: loading model from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin' - please wait ...\n",
"llama_model_load: n_vocab = 32000\n",
"llama_model_load: n_ctx = 1000\n",
"llama_model_load: n_embd = 5120\n",
"llama_model_load: n_mult = 256\n",
"llama_model_load: n_head = 40\n",
"llama_model_load: n_layer = 40\n",
"llama_model_load: n_rot = 128\n",
"llama_model_load: f16 = 2\n",
"llama_model_load: n_ff = 13824\n",
"llama_model_load: n_parts = 2\n",
"llama_model_load: type = 2\n",
"llama_model_load: ggml map size = 7759.83 MB\n",
"llama_model_load: ggml ctx size = 101.25 KB\n",
"llama_model_load: mem required = 9807.93 MB (+ 3216.00 MB per state)\n",
"llama_model_load: loading tensors from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin'\n",
"llama_model_load: model size = 7759.39 MB / num tensors = 363\n",
"llama_init_from_file: kv self size = 1562.50 MB\n"
]
}
],
"source": [
"from langchain.llms import GPT4All\n",
"\n",
"# Instantiate the model. Callbacks support token-wise streaming\n",
"model = GPT4All(model=\"/workspaces/models/ggml-gpt4all-l13b-snoozy.bin\", n_ctx=1000, n_threads=8)\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mCanceled future for execute_request message before replies were done"
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"qa.run(query)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using LlamaCpp"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import LlamaCpp"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"llama.cpp: loading model from /workspaces/models/ggml-model-q4_0.bin\n",
"llama.cpp: can't use mmap because tensors are not aligned; convert to new format to avoid this\n",
"llama_model_load_internal: format = 'ggml' (old version with low tokenizer quality and no mmap support)\n",
"llama_model_load_internal: n_vocab = 32000\n",
"llama_model_load_internal: n_ctx = 2000\n",
"llama_model_load_internal: n_embd = 4096\n",
"llama_model_load_internal: n_mult = 256\n",
"llama_model_load_internal: n_head = 32\n",
"llama_model_load_internal: n_layer = 32\n",
"llama_model_load_internal: n_rot = 128\n",
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
"llama_model_load_internal: n_ff = 11008\n",
"llama_model_load_internal: n_parts = 1\n",
"llama_model_load_internal: model size = 7B\n",
"llama_model_load_internal: ggml ctx size = 4113748.20 KB\n",
"llama_model_load_internal: mem required = 5809.33 MB (+ 1026.00 MB per state)\n",
"...................................................................................................\n",
".\n",
"llama_init_from_file: kv self size = 1000.00 MB\n",
"AVX = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 1 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | \n"
]
}
],
"source": [
"model = LlamaCpp(model_path=\"/workspaces/models/ggml-model-q4_0.bin\", n_ctx=2000, n_threads=8)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\" The President said that Justice Breyer's legacy of excellence should be continued by Judge Keitanji Brown Jackson, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He is a congressional builder who has received a broad range of support.\""
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"qa.run(query)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment