Skip to content

Instantly share code, notes, and snippets.

@RoseSecurity
Last active February 3, 2024 23:45
Show Gist options
  • Save RoseSecurity/7c4f03a85aa25bad9ef10b8ce45f5bae to your computer and use it in GitHub Desktop.
Save RoseSecurity/7c4f03a85aa25bad9ef10b8ce45f5bae to your computer and use it in GitHub Desktop.
Don't have time to watch tutorials and technical videos? Need to quickly identify useful information? This script harnesses machine learning to summarize YouTube videos.
#!/usr/bin/env python3
import os
import argparse
import whisper
from pytube import YouTube
from transformers import pipeline
# Harness machine learning to summarize YouTube video transcriptions
# Usage: python3 summarizer.py -u <URL>
class Font:
GREEN = '\033[92m'
RED = '\033[91m'
ITALIC = '\033[3m'
END = '\033[0m'
def cleanup():
os.remove("summarizer_audio/youtube.mp3")
os.rmdir("summarizer_audio")
def get_summary(video_url):
try:
# Create a YouTube object from the URL
print(Font.GREEN + "Transcribing video..." + Font.END)
yt = YouTube(video_url)
# Get the audio stream
audio_stream = yt.streams.filter(only_audio=True).first()
# Download the audio stream
output_path = "summarizer_audio"
filename = "youtube.mp3"
audio_stream.download(output_path=output_path, filename=filename)
# Load the base model and transcribe the audio
model = whisper.load_model("base")
result = model.transcribe("summarizer_audio/youtube.mp3", fp16=False)
transcribed_text = result["text"]
# Create summary of transcription
print(Font.GREEN + "Summarizing transcription...\n" + Font.END)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(transcribed_text, max_length=100, min_length=50)
print("Summary:\n\n" + Font.ITALIC +
summary[0]["summary_text"] + Font.END + "\n")
except Exception as e:
print(Font.RED + f"An error occurred: {e}" + Font.END)
finally:
cleanup()
def main():
parser = argparse.ArgumentParser(
description='Summarize YouTube videos.')
parser.add_argument('-u', '--url', help='The url of the YouTube video')
args = parser.parse_args()
if args.url:
video_url = str(args.url)
get_summary(video_url)
else:
parser.print_help()
exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment