Skip to content

Instantly share code, notes, and snippets.

@roylez
Created September 4, 2023 06:53
Show Gist options
  • Save roylez/f780cc41e62f81ac6b0e65a10b4c892e to your computer and use it in GitHub Desktop.
Save roylez/f780cc41e62f81ac6b0e65a10b4c892e to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# https://python.langchain.com/docs/use_cases/question_answering/
# pip install chromadb langchain openai tiktoken
# pip install unstructured markdown # for markdown loader
# pip install sqlite-vss
import dotenv
dotenv.load_dotenv()
# basic integration
from langchain.document_loaders import WebBaseLoader
from langchain.indexes import VectorstoreIndexCreator
loader = WebBaseLoader("https://juju.is/docs/dev/agent-introspection")
index = VectorstoreIndexCreator().from_loaders([loader])
print(index.query("which version of juju defines `juju_leaves`"))
# load and save
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import Chroma as VSS
from langchain.vectorstores import SQLiteVSS as VSS
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
web_splits = text_splitter.split_documents(data)
## even more data, from markdown
from langchain.document_loaders import DirectoryLoader
md_loader = DirectoryLoader(".", glob="**/*.md")
md_splits = md_loader.load_and_split()
#print(md_splits)
vectorstore = VSS.from_documents(documents=web_splits + md_splits, embedding=OpenAIEmbeddings())
# retrieve, vector based
#question = "which version of juju defines juju_unit_status"
#docs = vectorstore.similarity_search(question)
# generate answers, LLMChain could be another choice which retains context
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
qa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever())
def query(question):
res = qa_chain({"query": question})
print(res['result'])
return res
query("what is the name of roy's pet")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment