Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Cdaprod/1d01c6be10b7fab1c5bc16273d78cbe1 to your computer and use it in GitHub Desktop.
Save Cdaprod/1d01c6be10b7fab1c5bc16273d78cbe1 to your computer and use it in GitHub Desktop.
from langchain.document_loaders import YoutubeLoader
from langchain.indexes import VectorstoreIndexCreator
urls = [
("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"),
("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"),
("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"),
("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"),
("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"),
("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"),
("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"),
("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"),
]
docs = []
for url, title in urls:
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
_docs = loader.load()
for d in _docs:
d.metadata["name"] = title
docs.append(d)
from langchain.schema import Document
from langchain.embeddings import CohereEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
embeddings = CohereEmbeddings()
docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs)
vectorstore = Chroma.from_documents(
docs, embeddings
)
from langchain.llms import OpenAI
llm = OpenAI(temperature=0)
vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())
vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?")
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
metadata_field_info=[
AttributeInfo(
name="name",
description=f"The name of the video, should be one of: {[t for _, t in urls]}",
type="string or list[string]",
),
]
document_content_description = "excerpts from langchain webinars"
retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)
chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
chain.run("what did they say about prompt injection in the agents in production webinar?")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment