Skip to content

Instantly share code, notes, and snippets.

@ggarber
Created May 13, 2023 13:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ggarber/a2e6c68567a4c2abbbf488c37f4a99a3 to your computer and use it in GitHub Desktop.
Save ggarber/a2e6c68567a4c2abbbf488c37f4a99a3 to your computer and use it in GitHub Desktop.
# Install dependencies "pip install youtube_transcript_api langchain"
# Run with "OPENAI_API_KEY=xxxx python summarize.py <youtube_id>"
import sys
from youtube_transcript_api import YouTubeTranscriptApi
from langchain import OpenAI, PromptTemplate
from langchain.text_splitter import TokenTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
srt = YouTubeTranscriptApi.get_transcript(sys.argv[1])
captions = [c['text'] for c in srt]
text = ' '.join(captions)
llm = OpenAI(temperature=0, max_tokens=1000)
text_splitter = TokenTextSplitter(chunk_size=2000, chunk_overlap=100)
docs = text_splitter.create_documents([text])
prompt_template = """Summarize in 10 bullet points the following presentation:
{text}"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm, chain_type="map_reduce", combine_prompt=PROMPT)
# run langchain chain and print results
result = chain.run(docs)
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment