Skip to content

Instantly share code, notes, and snippets.

@mikeplavsky
Created June 5, 2023 14:26
Show Gist options
  • Save mikeplavsky/41bb47dfbb7dafdb8d05f9b789e700ed to your computer and use it in GitHub Desktop.
Save mikeplavsky/41bb47dfbb7dafdb8d05f9b789e700ed to your computer and use it in GitHub Desktop.
Chatting about youtube videos transcripts
#%%
import os
import openai
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import TokenTextSplitter
# Load environment variables
load_dotenv(override=True)
# Configure OpenAI API
openai.api_type = "azure"
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_version = "2023-03-15-preview"
#%%
from langchain.document_loaders import YoutubeLoader
import sys
url = sys.argv[1]
loader = YoutubeLoader.from_youtube_url(url)
transcript = loader.load()[0]
#%%
splitter = TokenTextSplitter(model_name='gpt-3.5-turbo', chunk_size=4000, chunk_overlap=100)
docs = splitter.split_text(transcript.page_content)
#%%
# Create embeddings
embeddings = OpenAIEmbeddings(
deployment="Embeddings",model="text-embedding-ada-002", chunk_size=1)
# %%
from langchain.vectorstores import Chroma
docsearch = Chroma.from_texts(docs, embeddings)
# %%
retriever = docsearch.as_retriever(search_kwargs={"k": 2})
# %%
llm = AzureChatOpenAI(
openai_api_base = 'https://druid.openai.azure.com/',
deployment_name = 'Gpt35',
openai_api_type = 'azure',
temperature = 0,
verbose=False)
# %%
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(
llm = llm, chain_type = "stuff", retriever = retriever, verbose = False)
# %%
while True:
query = input("\nQ: ")
if query == "exit":
break
res = qa(query)
answer = res['result']
print(answer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment