Skip to content

Instantly share code, notes, and snippets.

@spencerkittleson
Created June 26, 2023 16:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spencerkittleson/8dc2eab0c8246cd39e835c4fc5353078 to your computer and use it in GitHub Desktop.
Save spencerkittleson/8dc2eab0c8246cd39e835c4fc5353078 to your computer and use it in GitHub Desktop.
gpt4all with knowledge
from langchain.llms import GPT4All
from langchain.vectorstores import Chroma
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredMarkdownLoader
from langchain.chains import RetrievalQA
import sys
import os
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
# https://gist.github.com/scriptsandthings/75c38c54e05dd20d65fd83a9bd522406
# GPT4ALL_MODEL_PATH = r'C:\Users\spenc\.models\GPT4All-13B-snoozy.ggmlv3.q4_0.bin' # works
# embeddings = LlamaCppEmbeddings(model_path=GPT4ALL_MODEL_PATH)
GPT4ALL_MODEL_PATH = r'C:\Users\spenc\.models\GPT4All-13B-snoozy.ggmlv3.q4_0.bin' # works
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
def main() -> None:
print('GPT Deki')
# Enable to save to disk & reuse the model (for repeated queries on the same data)
PERSIST = True
query = "What is the name of my dog?"
if PERSIST and os.path.exists("persist"):
print("Reusing index...\n")
vectorstore = Chroma(persist_directory="persist",embedding_function=embeddings)
index = VectorStoreIndexWrapper(vectorstore=vectorstore)
else:
loader = TextLoader("data.txt") # Use this line if you only need data.txt
# loader = DirectoryLoader("data/")
if PERSIST:
index = VectorstoreIndexCreator(embedding=embeddings,vectorstore_kwargs={"persist_directory":"persist"}).from_loaders([loader])
else:
index = VectorstoreIndexCreator(embedding=embeddings).from_loaders([loader])
model = GPT4All(model=GPT4ALL_MODEL_PATH)
chain = RetrievalQA.from_chain_type(llm=model,retriever=index.vectorstore.as_retriever(search_kwargs={"k": 1}))
print(chain.run(query))
langchain
chromadb
tiktoken
gpt4all
llama-cpp-python
sentence-transformers
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment