Skip to content

Instantly share code, notes, and snippets.

@AntonioRossi
Forked from peterw/embed.py
Created April 18, 2023 10:30
Show Gist options
  • Save AntonioRossi/cf0d5c755fea366a0650fa22ee6e72a8 to your computer and use it in GitHub Desktop.
Save AntonioRossi/cf0d5c755fea366a0650fa22ee6e72a8 to your computer and use it in GitHub Desktop.
embedding the pdf
import openai
import streamlit as st
from streamlit_chat import message
from dotenv import load_dotenv
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import openai
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredPDFLoader
load_dotenv()
openai.api_key = os.environ.get('OPENAI_API_KEY', 'sk-9azBt6Dd8j7p5z5Lwq2S9EhmkVX48GtN2Kt2t3GJGN94SQ2')
persist_directory = 'ai_paper1'
embeddings = OpenAIEmbeddings()
if not os.path.exists(persist_directory):
print('embedding the document now')
loader = UnstructuredPDFLoader('ai_paper.pdf', mode="elements")
pages = loader.load_and_split()
vectordb = Chroma.from_documents(documents=pages, embedding=embeddings, persist_directory=persist_directory)
vectordb.persist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment