Last active
February 3, 2024 23:45
-
-
Save RoseSecurity/7c4f03a85aa25bad9ef10b8ce45f5bae to your computer and use it in GitHub Desktop.
Don't have time to watch tutorials and technical videos? Need to quickly identify useful information? This script harnesses machine learning to summarize YouTube videos.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import argparse | |
import whisper | |
from pytube import YouTube | |
from transformers import pipeline | |
# Harness machine learning to summarize YouTube video transcriptions | |
# Usage: python3 summarizer.py -u <URL> | |
class Font: | |
GREEN = '\033[92m' | |
RED = '\033[91m' | |
ITALIC = '\033[3m' | |
END = '\033[0m' | |
def cleanup(): | |
os.remove("summarizer_audio/youtube.mp3") | |
os.rmdir("summarizer_audio") | |
def get_summary(video_url): | |
try: | |
# Create a YouTube object from the URL | |
print(Font.GREEN + "Transcribing video..." + Font.END) | |
yt = YouTube(video_url) | |
# Get the audio stream | |
audio_stream = yt.streams.filter(only_audio=True).first() | |
# Download the audio stream | |
output_path = "summarizer_audio" | |
filename = "youtube.mp3" | |
audio_stream.download(output_path=output_path, filename=filename) | |
# Load the base model and transcribe the audio | |
model = whisper.load_model("base") | |
result = model.transcribe("summarizer_audio/youtube.mp3", fp16=False) | |
transcribed_text = result["text"] | |
# Create summary of transcription | |
print(Font.GREEN + "Summarizing transcription...\n" + Font.END) | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
summary = summarizer(transcribed_text, max_length=100, min_length=50) | |
print("Summary:\n\n" + Font.ITALIC + | |
summary[0]["summary_text"] + Font.END + "\n") | |
except Exception as e: | |
print(Font.RED + f"An error occurred: {e}" + Font.END) | |
finally: | |
cleanup() | |
def main(): | |
parser = argparse.ArgumentParser( | |
description='Summarize YouTube videos.') | |
parser.add_argument('-u', '--url', help='The url of the YouTube video') | |
args = parser.parse_args() | |
if args.url: | |
video_url = str(args.url) | |
get_summary(video_url) | |
else: | |
parser.print_help() | |
exit(1) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment