Skip to content

Instantly share code, notes, and snippets.

Created March 6, 2024 20:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikesparr/5833b2df9c9ed5db4ccd34b8cd4d31bf to your computer and use it in GitHub Desktop.
Save mikesparr/5833b2df9c9ed5db4ccd34b8cd4d31bf to your computer and use it in GitHub Desktop.
Experiment using Langchain, OpenAI and Streamlit, along with FAISS for CPU vector store, that analyzes YouTube transcripts and answers questions
#!/usr/bin/env bash
# reference: (some code was deprecated so fix along way)
# create folder
mkdir youtube-assistant
cd youtube-assistant
# set up virtual env
python3 -m venv .venv
source .venv/bin/activate
# install dependencies
pip3 install -U python-dotenv langchain langchain-community langchain-openai openai youtube-transcript-api streamlit faiss-cpu
# create .env
echo "OPENAI_API_KEY=\"sk-YOURKEYHERE\"" > .env
# test the helper
python3 # prints object ID (hoping for no errors, warnings expected)
# run the chat bot using streamlit
streamlit run
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain_community.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAI
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
embeddings = OpenAIEmbeddings()
def create_vector_db_from_youtube_url(video_url: str) -> FAISS:
loader = YoutubeLoader.from_youtube_url(video_url)
transcript = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(transcript)
db = FAISS.from_documents(docs, embeddings)
return db
def get_response_from_query(db, query, k=4):
# text-davinci can handle 4097 tokens
docs = db.similarity_search(query, k)
docs_page_content = " ".join([d.page_content for d in docs])
llm = OpenAI(model="gpt-3.5-turbo-instruct") #
prompt = PromptTemplate(
input_variables = ["question", "docs"],
template = """
You are a helpful YouTube assistant that can answer questions about videos based on the video's transcript.
Answer the following question: {question}
By searching the following video transcript: {docs}
Only use the factual information from the transcript to answer the question.
If you feel like you don't have enough information to answer the question, say "I don't know".
Your answers should be detailed.
chain = LLMChain(llm = llm, prompt = prompt)
response = = query, docs = docs_page_content)
response = response.replace("\n", "")
return response, docs
if __name__ == "__main__":
print("Testing ...")
video_url = ""
result = create_vector_db_from_youtube_url(video_url)
print(result) # just prints object ID to confirm it created it
import langchain_helper as lch
import streamlit as st
import textwrap
st.title("YouTube Assistant")
with st.sidebar:
with st.form(key='my_form'):
youtube_url = st.sidebar.text_area(
label="What is the YouTube video URL?",
query = st.sidebar.text_area(
label="Ask me about the video?",
submit_button = st.form_submit_button(label="Submit")
if query and youtube_url:
db = lch.create_vector_db_from_youtube_url(youtube_url)
response, docs = lch.get_response_from_query(db, query)
st.text(textwrap.fill(response, width = 80))
Copy link

Generative AI (GenAI) YouTube Video Helper

This example from second part of a Langchain Tutorial teaches you how to transcribe text from YouTube videos, then embed and store the chunked text into a vector store using FAISS, and ask questions about the video using only the transcribed text.


Valid query

Screenshot 2024-03-06 at 1 11 51 PM

Invalid query

Screenshot 2024-03-06 at 1 12 20 PM

Test with different video on Kong API Gateways

Screenshot 2024-03-06 at 1 15 14 PM
Screenshot 2024-03-06 at 1 19 46 PM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment