Created
May 17, 2023 12:35
-
-
Save hsm207/69b3c24b231375b74e8a5ab6f57ffe58 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Using OpeanAI" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"\n", | |
"# set the openai key in the environment\n", | |
"os.environ[\"OPENAI_API_KEY\"] = \"sk-YOUROPENAIAPIKEY\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from langchain.embeddings.openai import OpenAIEmbeddings\n", | |
"from langchain.vectorstores import Chroma\n", | |
"from langchain.text_splitter import CharacterTextSplitter\n", | |
"from langchain.llms import OpenAI\n", | |
"from langchain.chains import RetrievalQA" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Using embedded DuckDB without persistence: data will be transient\n" | |
] | |
} | |
], | |
"source": [ | |
"from langchain.document_loaders import TextLoader\n", | |
"loader = TextLoader(\"/workspaces/privateGPT/state_of_the_union.txt\")\n", | |
"documents = loader.load()\n", | |
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", | |
"texts = text_splitter.split_documents(documents)\n", | |
"\n", | |
"embeddings = OpenAIEmbeddings()\n", | |
"docsearch = Chroma.from_documents(texts, embeddings)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", retriever=docsearch.as_retriever())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and is a former top litigator, federal public defender, and comes from a family of public school educators and police officers. He said that she is a consensus builder and has received broad support from the Fraternal Order of Police and former judges appointed by Democrats and Republicans.\"" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = \"What did the president say about Ketanji Brown Jackson\"\n", | |
"qa.run(query)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Using GPT4All" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"llama_model_load: loading model from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin' - please wait ...\n", | |
"llama_model_load: n_vocab = 32000\n", | |
"llama_model_load: n_ctx = 1000\n", | |
"llama_model_load: n_embd = 5120\n", | |
"llama_model_load: n_mult = 256\n", | |
"llama_model_load: n_head = 40\n", | |
"llama_model_load: n_layer = 40\n", | |
"llama_model_load: n_rot = 128\n", | |
"llama_model_load: f16 = 2\n", | |
"llama_model_load: n_ff = 13824\n", | |
"llama_model_load: n_parts = 2\n", | |
"llama_model_load: type = 2\n", | |
"llama_model_load: ggml map size = 7759.83 MB\n", | |
"llama_model_load: ggml ctx size = 101.25 KB\n", | |
"llama_model_load: mem required = 9807.93 MB (+ 3216.00 MB per state)\n", | |
"llama_model_load: loading tensors from '/workspaces/models/ggml-gpt4all-l13b-snoozy.bin'\n", | |
"llama_model_load: model size = 7759.39 MB / num tensors = 363\n", | |
"llama_init_from_file: kv self size = 1562.50 MB\n" | |
] | |
} | |
], | |
"source": [ | |
"from langchain.llms import GPT4All\n", | |
"\n", | |
"# Instantiate the model. Callbacks support token-wise streaming\n", | |
"model = GPT4All(model=\"/workspaces/models/ggml-gpt4all-l13b-snoozy.bin\", n_ctx=1000, n_threads=8)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31mCanceled future for execute_request message before replies were done" | |
] | |
}, | |
{ | |
"ename": "", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details." | |
] | |
} | |
], | |
"source": [ | |
"qa.run(query)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Using LlamaCpp" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from langchain.llms import LlamaCpp" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"llama.cpp: loading model from /workspaces/models/ggml-model-q4_0.bin\n", | |
"llama.cpp: can't use mmap because tensors are not aligned; convert to new format to avoid this\n", | |
"llama_model_load_internal: format = 'ggml' (old version with low tokenizer quality and no mmap support)\n", | |
"llama_model_load_internal: n_vocab = 32000\n", | |
"llama_model_load_internal: n_ctx = 2000\n", | |
"llama_model_load_internal: n_embd = 4096\n", | |
"llama_model_load_internal: n_mult = 256\n", | |
"llama_model_load_internal: n_head = 32\n", | |
"llama_model_load_internal: n_layer = 32\n", | |
"llama_model_load_internal: n_rot = 128\n", | |
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n", | |
"llama_model_load_internal: n_ff = 11008\n", | |
"llama_model_load_internal: n_parts = 1\n", | |
"llama_model_load_internal: model size = 7B\n", | |
"llama_model_load_internal: ggml ctx size = 4113748.20 KB\n", | |
"llama_model_load_internal: mem required = 5809.33 MB (+ 1026.00 MB per state)\n", | |
"...................................................................................................\n", | |
".\n", | |
"llama_init_from_file: kv self size = 1000.00 MB\n", | |
"AVX = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 1 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | \n" | |
] | |
} | |
], | |
"source": [ | |
"model = LlamaCpp(model_path=\"/workspaces/models/ggml-model-q4_0.bin\", n_ctx=2000, n_threads=8)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"qa = RetrievalQA.from_chain_type(llm=model, chain_type=\"stuff\", retriever=docsearch.as_retriever())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"\" The President said that Justice Breyer's legacy of excellence should be continued by Judge Keitanji Brown Jackson, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He is a congressional builder who has received a broad range of support.\"" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"qa.run(query)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.3" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment