Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
Created May 26, 2024 22:07
Show Gist options
  • Save hoehrmann/473e0a85caaba4094e6e6ef5c4ee9f9a to your computer and use it in GitHub Desktop.
Save hoehrmann/473e0a85caaba4094e6e6ef5c4ee9f9a to your computer and use it in GitHub Desktop.
LlamaIndex RAG CLI with local models
#!/usr/bin/env python
from llama_index.core.ingestion import IngestionPipeline, IngestionCache
from llama_index.core.query_pipeline import QueryPipeline
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.cli.rag import RagCLI
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index_client import CloudChromaVectorStore
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.llms.openai import OpenAI
import chromadb
import rich.traceback
rich.traceback.install(show_locals=True)
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en", max_length=512)
# Assumes a local llama.cpp/build/bin/server -m Meta-Llama-3-8B-Instruct.Q4_0.gguf --port 9999 or equivalent
llm = OpenAI(api_base='http://127.0.0.1:9999', api_key='no_key')
docstore = SimpleDocumentStore()
persist_dir = "."
chroma_client = chromadb.PersistentClient(path=persist_dir)
chroma_collection = chroma_client.create_collection("default", get_or_create=True)
vector_store = ChromaVectorStore(
chroma_collection=chroma_collection, persist_dir=persist_dir
)
custom_ingestion_pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_size=512, chunk_overlap=20),
HuggingFaceEmbedding(),
],
vector_store=vector_store,
docstore=docstore,
cache=IngestionCache(),
)
rag_cli_instance = RagCLI(
ingestion_pipeline=custom_ingestion_pipeline,
llm=llm, # optional
)
if __name__ == "__main__":
rag_cli_instance.cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment