Skip to content

Instantly share code, notes, and snippets.

@hwchase17
Created May 11, 2023 22:39
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save hwchase17/8bb41c048a6facb881634fc55c54a55d to your computer and use it in GitHub Desktop.
Save hwchase17/8bb41c048a6facb881634fc55c54a55d to your computer and use it in GitHub Desktop.
from langchain.document_loaders import YoutubeLoader
from langchain.indexes import VectorstoreIndexCreator
urls = [
("https://www.youtube.com/watch?v=fP6vRNkNEt0", "Prompt Injection"),
("https://www.youtube.com/watch?v=qWv2vyOX0tk", "Low Code-No Code"),
("https://www.youtube.com/watch?v=k8GNCCs16F4", "Agents In Production"),
("https://www.youtube.com/watch?v=1gRlCjy18m4", "Agents"),
("https://www.youtube.com/watch?v=fLn-WqliEQU", "Output Parsing"),
("https://www.youtube.com/watch?v=ywT-5yKDtDg", "Document QA"),
("https://www.youtube.com/watch?v=GrCFyyyAxCU", "SQL"),
("https://www.youtube.com/watch?v=AKsfHK_4tf4", "Chat Documents with JS"),
]
docs = []
for url, title in urls:
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
_docs = loader.load()
for d in _docs:
d.metadata["name"] = title
docs.append(d)
from langchain.schema import Document
from langchain.embeddings import CohereEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
embeddings = CohereEmbeddings()
docs = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(docs)
vectorstore = Chroma.from_documents(
docs, embeddings
)
from langchain.llms import OpenAI
llm = OpenAI(temperature=0)
vectorstore_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())
vectorstore_chain.run("what did they say about prompt injection in the agents in production webinar?")
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
metadata_field_info=[
AttributeInfo(
name="name",
description=f"The name of the video, should be one of: {[t for _, t in urls]}",
type="string or list[string]",
),
]
document_content_description = "excerpts from langchain webinars"
retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)
chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
chain.run("what did they say about prompt injection in the agents in production webinar?")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment