Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save diyism/631b44016dcc05d8b71dfd9ee22d9baf to your computer and use it in GitHub Desktop.
Save diyism/631b44016dcc05d8b71dfd9ee22d9baf to your computer and use it in GitHub Desktop.
youtube summarize
# Install dependencies "pip install youtube_transcript_api langchain"
# Run with "OPENAI_API_KEY=xxxx python summarize.py <youtube_id>"
import sys
from youtube_transcript_api import YouTubeTranscriptApi
from langchain import OpenAI, PromptTemplate
from langchain.text_splitter import TokenTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
srt = YouTubeTranscriptApi.get_transcript(sys.argv[1])
captions = [c['text'] for c in srt]
text = ' '.join(captions)
llm = OpenAI(temperature=0, max_tokens=1000)
text_splitter = TokenTextSplitter(chunk_size=2000, chunk_overlap=100)
docs = text_splitter.create_documents([text])
prompt_template = """Summarize in 10 bullet points the following presentation:
{text}"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="map_reduce", combine_prompt=PROMPT)
# run langchain chain and print results
result = chain.run(docs)
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment