Skip to content

Instantly share code, notes, and snippets.

@alevz257
Last active January 8, 2024 08:58
Show Gist options
  • Save alevz257/a6fee484c4478ca76610bcae0af2f0b8 to your computer and use it in GitHub Desktop.
Save alevz257/a6fee484c4478ca76610bcae0af2f0b8 to your computer and use it in GitHub Desktop.
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
#from langchain.vectorstores import Chroma
from langchain_community.vectorstores.pgvector import PGVector
from langchain_community.document_loaders import GCSDirectoryLoader
from langchain_community.llms import VertexAI
from langchain_community.embeddings import VertexAIEmbeddings
from flask import Flask
import os
llm = VertexAI(
model_name='text-bison@002',
max_output_tokens=256,
temperature=0.1,
top_p=0.8,top_k=40,
verbose=True,
)
app = Flask(__name__)
@app.route('/', methods = ['POST', 'GET'])
def embed():
REQUESTS_PER_MINUTE = 150
embedding = VertexAIEmbeddings(requests_per_minute=REQUESTS_PER_MINUTE)
# load document
loader = GCSDirectoryLoader(project_name="genai-alevz-demo-1", bucket="alevz_genai_test_unstructured")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
print(len(texts))
embeddings = VertexAIEmbeddings(requests_per_minute=REQUESTS_PER_MINUTE)
CONNECTION_STRING = PGVector.connection_string_from_db_params(
driver=os.environ.get("PGVECTOR_DRIVER", "psycopg2"),
host=os.environ.get("PGVECTOR_HOST", "34.101.101.119"),
port=int(os.environ.get("PGVECTOR_PORT", "5432")),
database=os.environ.get("PGVECTOR_DATABASE", "postgres"),
user=os.environ.get("PGVECTOR_USER", "postgres"),
password=os.environ.get("PGVECTOR_PASSWORD", "admin123"),
)
COLLECTION_NAME = 'test_collection'
db = PGVector.from_documents(
embedding=embeddings,
documents=texts,
connection_string=CONNECTION_STRING,
collection_name=COLLECTION_NAME,
)
return 'done embed'
if __name__ == '__main__':
app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment