# pip install chromadb langchain openai tiktoken
# pip install unstructured markdown # for markdown loader
# pip install sqlite-vss
import dotenv
# basic integration
from langchain.document_loaders import WebBaseLoader
from langchain.indexes import VectorstoreIndexCreator
loader = WebBaseLoader("")
index = VectorstoreIndexCreator().from_loaders([loader])
print(index.query("which version of juju defines `juju_leaves`"))
# load and save
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
# from langchain.vectorstores import Chroma as VSS
from langchain.vectorstores import SQLiteVSS as VSS
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
web_splits = text_splitter.split_documents(data)
## even more data, from markdown
from langchain.document_loaders import DirectoryLoader
md_loader = DirectoryLoader(".", glob="**/*.md")
md_splits = md_loader.load_and_split()
vectorstore = VSS.from_documents(documents=web_splits + md_splits, embedding=OpenAIEmbeddings())
# retrieve, vector based
#question = "which version of juju defines juju_unit_status"
#docs = vectorstore.similarity_search(question)
# generate answers, LLMChain could be another choice which retains context
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
qa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever())
def query(question):
res = qa_chain({"query": question})
return res
query("what is the name of roy's pet")
