Created
May 19, 2024 08:04
-
-
Save rxerium/5c7921cb248ea11a02f6ea25b8e3a354 to your computer and use it in GitHub Desktop.
A python script that integrates with any given AI model to summarise Youtube videos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from youtube_transcript_api import YouTubeTranscriptApi | |
tc = YouTubeTranscriptApi.get_transcript("") | |
file = open("transcript.txt", "w", encoding="utf-8") | |
for i in tc: | |
file.write(i['text'] + "\n") | |
from langchain_community.llms.ollama import Ollama | |
from langchain.prompts import PromptTemplate | |
from langchain_core.output_parsers.string import StrOutputParser | |
llm = Ollama(model="gemma:7b", num_ctx=4096, num_predict=1024) | |
prompt = PromptTemplate.from_template("""\ | |
Please resume the below chunk of a transcript and reformat it with proper punctuation, spelling, grammar and capitalization.Respect the original meaning and intent of the speaker, but feel free to make minor adjustments as needed. | |
{chunk} | |
""") | |
chain = prompt | llm | StrOutputParser() | |
with open("transcript.txt", "r", encoding='utf-8') as input_file: | |
input_text = input_file.read() | |
with open("transcript_output.txt", "w", encoding='utf-8') as summary_file: | |
for i in range(0, len(input_text), 8192): | |
input_chunk = input_text[i-256:i + 8192-256] | |
output_chunk = chain.invoke({'chunk': input_chunk}) | |
summary_file.write(output_chunk + "\n") | |
summary_file.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment