Skip to content

Instantly share code, notes, and snippets.

@rxerium
Created May 19, 2024 08:04
Show Gist options
  • Save rxerium/5c7921cb248ea11a02f6ea25b8e3a354 to your computer and use it in GitHub Desktop.
Save rxerium/5c7921cb248ea11a02f6ea25b8e3a354 to your computer and use it in GitHub Desktop.
A python script that integrates with any given AI model to summarise Youtube videos
from youtube_transcript_api import YouTubeTranscriptApi
tc = YouTubeTranscriptApi.get_transcript("")
file = open("transcript.txt", "w", encoding="utf-8")
for i in tc:
file.write(i['text'] + "\n")
from langchain_community.llms.ollama import Ollama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers.string import StrOutputParser
llm = Ollama(model="gemma:7b", num_ctx=4096, num_predict=1024)
prompt = PromptTemplate.from_template("""\
Please resume the below chunk of a transcript and reformat it with proper punctuation, spelling, grammar and capitalization.Respect the original meaning and intent of the speaker, but feel free to make minor adjustments as needed.
{chunk}
""")
chain = prompt | llm | StrOutputParser()
with open("transcript.txt", "r", encoding='utf-8') as input_file:
input_text = input_file.read()
with open("transcript_output.txt", "w", encoding='utf-8') as summary_file:
for i in range(0, len(input_text), 8192):
input_chunk = input_text[i-256:i + 8192-256]
output_chunk = chain.invoke({'chunk': input_chunk})
summary_file.write(output_chunk + "\n")
summary_file.flush()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment