Forked from ggarber/gist:a2e6c68567a4c2abbbf488c37f4a99a3
Last active
May 13, 2023 17:49
-
-
Save diyism/631b44016dcc05d8b71dfd9ee22d9baf to your computer and use it in GitHub Desktop.
youtube summarize
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install dependencies "pip install youtube_transcript_api langchain" | |
# Run with "OPENAI_API_KEY=xxxx python summarize.py <youtube_id>" | |
import sys | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from langchain import OpenAI, PromptTemplate | |
from langchain.text_splitter import TokenTextSplitter | |
from langchain.prompts import PromptTemplate | |
from langchain.chains.summarize import load_summarize_chain | |
srt = YouTubeTranscriptApi.get_transcript(sys.argv[1]) | |
captions = [c['text'] for c in srt] | |
text = ' '.join(captions) | |
llm = OpenAI(temperature=0, max_tokens=1000) | |
text_splitter = TokenTextSplitter(chunk_size=2000, chunk_overlap=100) | |
docs = text_splitter.create_documents([text]) | |
prompt_template = """Summarize in 10 bullet points the following presentation: | |
{text}""" | |
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"]) | |
chain = load_summarize_chain(llm, chain_type="map_reduce", combine_prompt=PROMPT) | |
# run langchain chain and print results | |
result = chain.run(docs) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment