Skip to content

Instantly share code, notes, and snippets.

@steffahn
Created December 28, 2023 14:10
Show Gist options
  • Save steffahn/31db3ae8ae4ec40fb9e77d8dd21eb4a8 to your computer and use it in GitHub Desktop.
Save steffahn/31db3ae8ae4ec40fb9e77d8dd21eb4a8 to your computer and use it in GitHub Desktop.
# Dependencies: yt-dlp, youtube_transcript_api, colorama
# Install them using pip if not already installed:
# pip install yt-dlp youtube_transcript_api colorama
import os
import yt_dlp as youtube_dl
import shutil
from youtube_transcript_api import YouTubeTranscriptApi
from concurrent.futures import ThreadPoolExecutor
from colorama import Fore, Style
channel_url = input("Enter the YouTube channel URL: ")
# Function to fetch a single video's transcript
def fetch_transcript(video_data):
video_url, title = video_data
try:
transcript = YouTubeTranscriptApi.get_transcript(video_url.split('=')[-1])
transcript_text = '\n'.join([text['text'] for text in transcript])
except:
transcript_text = 'Transcript not available.'
return title, video_url, transcript_text
# Function to get video URLs and titles from a channel
def get_channel_videos(channel_url):
ydl_opts = {'quiet': True, 'extract_flat': True}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(channel_url, download=False)
if 'entries' in result:
return [(entry['url'], entry['title']) for entry in result['entries']]
else:
return [(result['url'], result['title'])]
# Function to find the next available script file name
def find_next_script_file():
n = 1
while os.path.exists(f'script{n}.txt'):
n += 1
return f'script{n}.txt'
# Fetch video URLs and titles
videos = get_channel_videos(channel_url)
# Output the number of videos found
print(f'Found {len(videos)} videos. Downloading transcripts...')
# File to save transcripts
script_file = find_next_script_file()
# Function to update and display the progress bar
def display_progress_bar(total, current):
terminal_width = shutil.get_terminal_size((80, 20)).columns
bar_length = terminal_width - 30 # Adjust for the rest of the output
progress = int(bar_length * current / total)
bar = f"{Fore.GREEN}[{Fore.BLUE}{'#' * progress}{Fore.GREEN}{'-' * (bar_length - progress)}{Fore.GREEN}]{Style.RESET_ALL}"
print(f'\r{bar} {current}/{total} transcripts', end='')
# Separator bar
separator = '=' * 40
# Start writing to the file as each transcript is fetched
total_videos = len(videos)
completed_videos = 0
with ThreadPoolExecutor(max_workers=6) as executor:
with open(script_file, 'w') as file:
for title, video_url, transcript_text in executor.map(fetch_transcript, videos):
file.write(f'{title}\n{video_url}\n\n{transcript_text}\n\n{separator}\n\n')
completed_videos += 1
display_progress_bar(total_videos, completed_videos)
# Final output
print(f'\n{Fore.CYAN}Transcripts saved to {script_file}{Style.RESET_ALL}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment