Last active
May 4, 2025 00:48
-
-
Save ArmaanjeetSandhu/4c23149145b7f2f23870c1294ef40623 to your computer and use it in GitHub Desktop.
Organize and format YouTube video transcripts using LangChain
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain_community.document_loaders import YoutubeLoader | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.prompts import PromptTemplate | |
from langchain_openai import ChatOpenAI | |
model = ChatOpenAI() | |
parser = StrOutputParser() | |
url = input("Enter the YouTube video URL: ") | |
loader = YoutubeLoader.from_youtube_url(url, add_video_info=False) | |
documents = loader.load() | |
transcript = documents[0].page_content | |
template = PromptTemplate( | |
template="""Organize and format the following text properly:\n```\n{transcript}\n```\nDon't omit anything.""", | |
input_variables=["transcript"], | |
validate_template=True, | |
) | |
chain = template | model | parser | |
output = chain.invoke({"transcript": transcript}) | |
with open("transcript.txt", "w") as file: | |
file.write(output) | |
print("The transcript has been written to transcript.txt.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment