Skip to content

Instantly share code, notes, and snippets.

@orcaman
Last active May 9, 2023 16:23
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save orcaman/6f5be32ad9fb919d56f19d7b88fc91e6 to your computer and use it in GitHub Desktop.
Save orcaman/6f5be32ad9fb919d56f19d7b88fc91e6 to your computer and use it in GitHub Desktop.
LangChain Retrieval Question/Answering
import os
import sys
from langchain.text_splitter import CharacterTextSplitter
from langchain.utilities import WikipediaAPIWrapper
import dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain import OpenAI
from langchain.chains import RetrievalQA
import langchain
def api_client() -> WikipediaAPIWrapper:
return WikipediaAPIWrapper()
def get_wikipedia_search_term_from_command_line():
if len(sys.argv) < 2:
print("Missing argument: wikipedia search term")
sys.exit(1)
return sys.argv[1]
def load_wikipedia_page(wikipedia_search_term: str) -> list[langchain.schema.Document]:
docs = api_client().load(wikipedia_search_term)
return docs
def split_documents_into_chunks(documents, chunk_size=800, chunk_overlap=0):
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
return text_splitter.split_documents(documents)
def get_query_from_command_line():
if len(sys.argv) < 3:
print("Missing argument: query")
sys.exit(1)
return sys.argv[2]
def load_environment_variables():
dotenv.load_dotenv()
def prepare_model_embedding(texts):
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])
doc_search = Chroma.from_documents(texts, embeddings)
return RetrievalQA.from_chain_type(llm=OpenAI(), retriever=doc_search.as_retriever())
def main():
load_environment_variables()
wikipedia_search_term = get_wikipedia_search_term_from_command_line()
if ',' in wikipedia_search_term:
wikipedia_search_terms = wikipedia_search_term.split(',')
documents = []
for term in wikipedia_search_terms:
documents.extend(load_wikipedia_page(term.strip()))
else:
documents = load_wikipedia_page(wikipedia_search_term.strip())
texts = split_documents_into_chunks(documents)
chain = prepare_model_embedding(texts)
query = get_query_from_command_line()
print('\n\n\n\n\n-----------------')
print('wikipedia search terms:', wikipedia_search_term)
print('question:', query)
print('answer:', chain.run(query))
print('-----------------\n\n')
if __name__ == "__main__":
main()
@orcaman
Copy link
Author

orcaman commented May 9, 2023

To try it:

python wikipedia_learner.py 'Oded Menashe, Eden Harel' 'When was Oded Menashe, the Israeli presenter, born? Who is he married to?'

@orcaman
Copy link
Author

orcaman commented May 9, 2023

requirements.txt:

openai==0.27.6
langchain==0.0.161
python-dotenv==1.0.0
nltk==3.8.1
unstructured==0.6.3
pdfminer.six==20221105
chromadb==0.3.21
tiktoken==0.3.3
wikipedia==1.4.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment