Skip to content

Instantly share code, notes, and snippets.

@vdparikh
Last active May 15, 2023 06:39
Show Gist options
  • Save vdparikh/1d8e9de803d5fac869a6f038ad4a5151 to your computer and use it in GitHub Desktop.
Save vdparikh/1d8e9de803d5fac869a6f038ad4a5151 to your computer and use it in GitHub Desktop.
ChatGPT4All
wget https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized.bin
pip install pyllama
mkdir llama
python -m llama.download --model_size 7B --folder llama/
# https://github.com/nomic-ai/pygpt4all/pyllamacpp
# pip install pyllamacpp fails and so directly download it from github
git clone --recursive https://github.com/nomic-ai/pygpt4all/ && cd pyllamacpp
pip install .
pyllamacpp-convert-gpt4all gpt4all-lora-quantized.bin llama/tokenizer.model gpt4all-lora-q-converted.bin
GPT4ALL_MODEL_PATH = "/root/gpt4all-lora-q-converted.bin"
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.embeddings import LlamaCppEmbeddings
print("running")
GPT4ALL_MODEL_PATH="/root/gpt4all-lora-q-converted.bin"
print("loading llm")
llm = LlamaCpp(model_path=GPT4ALL_MODEL_PATH,max_tokens=128)
print("creating fragment")
def search_context(src, phrase, buffer=100):
with open(src, 'r') as f:
txt=f.read()
words = txt.split()
index = words.index(phrase)
start_index = max(0, index - buffer)
end_index = min(len(words), index + buffer+1)
return ' '.join(words[start_index:end_index])
fragment = './fragment.txt'
with open(fragment, 'w') as fo:
_txt = search_context('./state_of_the_union.txt', "Ketanji")
fo.write(_txt)
print("loading fragment")
loader = TextLoader('./fragment.txt')
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print("llama embeddings")
llama_embeddings = LlamaCppEmbeddings(model_path=GPT4ALL_MODEL_PATH, n_batch=512)
persist_directory = 'db_2'
docsearch = Chroma.from_documents(documents=texts, embedding=llama_embeddings, persist_directory=persist_directory)
MIN_DOCS = 1
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(search_kwargs={"k": MIN_DOCS}))
query = "What did the president say about Ketanji Brown Jackson"
print(query)
output = qa.run(query)
print(output)
"""
index = VectorstoreIndexCreator(embedding=llama_embeddings,
vectorstore_kwargs={"persist_directory": "db"}
).from_loaders([loader])
query = "What did the president say about Ketanji Brown Jackson"
index.query(query, llm=llm)
"""
template = """
Question: {question}
Answer:
"""
"""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm = LlamaCpp(model_path=GPT4ALL_MODEL_PATH)
llm_chain = LLMChain(prompt=prompt, llm=llm)
question = "What did the president say about Ketanji Brown Jackson"
resp = llm_chain.run(question)
print(resp)
"""
# pip install unstructured
# wget wget https://s21.q4cdn.com/399680738/files/doc_financials/2022/q4/Meta-12.31.2022-Exhibit-99.1-FINAL.pdf
# mkdir docs
# mv *.pdf docs/
# pip install pdf2image
# pip install pytesseract
# apt-get install poppler-utils
# pip install 'git+https://github.com/facebookresearch/detectron2.git'
# pip install Pillow==9.0.0
import os
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.embeddings import LlamaCppEmbeddings
from langchain.document_loaders import UnstructuredPDFLoader
from detectron2.config import get_cfg
cfg = get_cfg()
cfg.MODEL.DEVICE = 'gpu'
print("running")
GPT4ALL_MODEL_PATH="/root/gpt4all-lora-q-converted.bin"
print("loading llm")
llm = LlamaCpp(model_path=GPT4ALL_MODEL_PATH,max_tokens=128)
print("loading fragment")
#loader = TextLoader('./fragment.txt')
text_folder = 'docs'
#loaders = [UnstructuredPDFLoader(os.path.join(text_folder, fn)) for fn in os.listdir(text_folder)]
loader = UnstructuredPDFLoader("docs/Meta-12.31.2022-Exhibit-99.1-FINAL.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print("llama embeddings")
llama_embeddings = LlamaCppEmbeddings(model_path=GPT4ALL_MODEL_PATH, n_batch=512)
persist_directory = 'db_2'
docsearch = Chroma.from_documents(documents=texts, embedding=llama_embeddings, persist_directory=persist_directory)
MIN_DOCS = 1
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(search_kwargs={"k": MIN_DOCS}))
query = "How much revenue did Meta make in 2022?"
print(query)
output = qa.run(query)
print(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment