Skip to content

Instantly share code, notes, and snippets.

@diatche
Last active May 30, 2024 08:09
Show Gist options
  • Save diatche/96696a08e08a45ea44148eba75dd7825 to your computer and use it in GitHub Desktop.
Save diatche/96696a08e08a45ea44148eba75dd7825 to your computer and use it in GitHub Desktop.
Ask a URL using Llama 2 running locally
# Ask questions about a document using Ollama and Langchain.
# This leverages a local LLM to provide insights into any document.
# It's far from perfect, as it's power is limited by the LLM size.
# It leverages the langchain library to perform tasks like document loading,
# text splitting, embedding generation, and using the Ollama client for generating answers.
# You need to have Ollama installed (with model llama2:13b available). See https://github.com/jmorganca/ollama
# Usage:
# python ollama-langchain.py [URL]
# Dependencies:
# pip install langchain bs4 gpt4all chromadb
# Script based on https://github.com/jmorganca/ollama/blob/main/docs/tutorials/langchainpy.md
system_prompt = """
Answer based on the document provided only, unless otherwise stated. If you don't know the answer, type "I don't know".
"""
# Init the Ollama client
from langchain.llms import Ollama
ollama = Ollama(base_url="http://localhost:11434", model="llama2:13b", temperature=0)
# Load the text from the argument. If its a local file, load from the path. If its a URL, load from the URL.
import sys
from langchain.document_loaders import WebBaseLoader
file_or_url = sys.argv[1]
print("Loading from", file_or_url)
if file_or_url.startswith("http"):
loader = WebBaseLoader(web_path=[file_or_url])
else:
raise Exception("Only URLs are supported right now")
data = loader.load()
# Split the text into chunks
# See https://github.com/langchain-ai/langchain/issues/2026
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=10)
all_splits = text_splitter.split_documents(data)
# Generate embeddings
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())
while True:
# Ask for input
question = input("> ")
# Gather relevant parts of the document
docs = vectorstore.similarity_search(question)
# Send question and the relevant parts of the docs to the model
from langchain.chains import RetrievalQA
qachain = RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
response = qachain({"query": system_prompt + question})
print(response["result"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment