Skip to content

Instantly share code, notes, and snippets.

@DarylRodrigo
Last active May 3, 2023 18:34
Show Gist options
  • Save DarylRodrigo/5a564a71b28cd8fa087efe1d95411ea0 to your computer and use it in GitHub Desktop.
Save DarylRodrigo/5a564a71b28cd8fa087efe1d95411ea0 to your computer and use it in GitHub Desktop.
Gist showing how to populate a vector store from Notion and then infering on it using a chain.
import os
import pinecone
from langchain.llms import OpenAI
from langchain.document_loaders import NotionDBLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.document_loaders import TextLoader
# Environment Vars
os.environ["OPENAI_API_KEY"]="sk-xxx"
NOTION_API_KEY="xxx"
DATABASE_ID="xxx"
PINECONE_API_KEY="xxx"
# Load Notion Documents
# https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/notiondb.html?highlight=notion#create-a-notion-integration
loader = NotionDBLoader(NOTION_API_KEY, DATABASE_ID)
notion_docs = loader.load()
# Split text into sections for pinecone
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_notion_docs = text_splitter.split_documents(notion_docs)
# Upload to pinecone - use cosine and 1536 dimensions when setting up db
pinecone.init(api_key=PINECONE_API_KEY, environment="us-west1-gcp-free")
index_name = "pinecone-test"
embeddings = OpenAIEmbeddings()
Pinecone.from_documents(split_notion_docs, embeddings, index_name=index_name)
# Load vector store from pinecone
docsearch = Pinecone.from_existing_index(index_name, embeddings)
# - Inference
query = "Some question?"
# Get similar docs to question
similar_docs = docsearch.similarity_search(query)
# Run chain with similar docs as system prompt
chain = load_qa_chain(llm, chain_type="stuff")
answer = chain.run(input_documents=similar_docs, question=query)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment