Cdaprod/langchain-youtube-selfquery.py

## langchain-youtube-selfquery.py
from langchain.document_loaders import YoutubeLoader
from langchain.indexes import VectorstoreIndexCreator
urls = [
    ("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"),
    ("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"),
    ("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"),
    ("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"),
    ("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"),
    ("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"),
    ("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"),
    ("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"),

]
docs = []
for url, title in urls:
    loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
    _docs = loader.load()
    for d in _docs:
        d.metadata["name"] = title
        docs.append(d)

 from langchain.schema import Document
from langchain.embeddings import CohereEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
embeddings = CohereEmbeddings()

docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs)

vectorstore = Chroma.from_documents(
    docs, embeddings
)

from langchain.llms import OpenAI
llm = OpenAI(temperature=0)

vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?")

from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

metadata_field_info=[
    AttributeInfo(
        name="name",
        description=f"The name of the video, should be one of: {[t for _, t in urls]}",
        type="string or list[string]",
    ),
]
document_content_description = "excerpts from langchain webinars"

retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)

chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

chain.run("what did they say about prompt injection in the agents in production webinar?")
	from langchain.document_loaders import YoutubeLoader
	from langchain.indexes import VectorstoreIndexCreator
	urls = [
	("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"),
	("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"),
	("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"),
	("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"),
	("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"),
	("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"),
	("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"),
	("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"),

	]
	docs = []
	for url, title in urls:
	loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
	_docs = loader.load()
	for d in _docs:
	d.metadata["name"] = title
	docs.append(d)

	from langchain.schema import Document
	from langchain.embeddings import CohereEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import RetrievalQA
	embeddings = CohereEmbeddings()

	docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs)

	vectorstore = Chroma.from_documents(
	docs, embeddings
	)

	from langchain.llms import OpenAI
	llm = OpenAI(temperature=0)

	vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

	vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?")

	from langchain.retrievers.self_query.base import SelfQueryRetriever
	from langchain.chains.query_constructor.base import AttributeInfo

	metadata_field_info=[
	AttributeInfo(
	name="name",
	description=f"The name of the video, should be one of: {[t for _, t in urls]}",
	type="string or list[string]",
	),
	]
	document_content_description = "excerpts from langchain webinars"

	retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)

	chain = RetrievalQA.from_chain_type(llm, retriever=retriever)

	chain.run("what did they say about prompt injection in the agents in production webinar?")