steffahn/transcripts.py Secret

## transcripts.py
# Dependencies: yt-dlp, youtube_transcript_api, colorama
# Install them using pip if not already installed:
# pip install yt-dlp youtube_transcript_api colorama

import os
import yt_dlp as youtube_dl
import shutil
from youtube_transcript_api import YouTubeTranscriptApi
from concurrent.futures import ThreadPoolExecutor
from colorama import Fore, Style

channel_url = input("Enter the YouTube channel URL: ")

# Function to fetch a single video's transcript
def fetch_transcript(video_data):
    video_url, title = video_data
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_url.split('=')[-1])
        transcript_text = '\n'.join([text['text'] for text in transcript])
    except:
        transcript_text = 'Transcript not available.'
    return title, video_url, transcript_text

# Function to get video URLs and titles from a channel
def get_channel_videos(channel_url):
    ydl_opts = {'quiet': True, 'extract_flat': True}
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        result = ydl.extract_info(channel_url, download=False)
    if 'entries' in result:
        return [(entry['url'], entry['title']) for entry in result['entries']]
    else:
        return [(result['url'], result['title'])]

# Function to find the next available script file name
def find_next_script_file():
    n = 1
    while os.path.exists(f'script{n}.txt'):
        n += 1
    return f'script{n}.txt'

# Fetch video URLs and titles
videos = get_channel_videos(channel_url)

# Output the number of videos found
print(f'Found {len(videos)} videos. Downloading transcripts...')

# File to save transcripts
script_file = find_next_script_file()

# Function to update and display the progress bar
def display_progress_bar(total, current):
    terminal_width = shutil.get_terminal_size((80, 20)).columns
    bar_length = terminal_width - 30  # Adjust for the rest of the output
    progress = int(bar_length * current / total)
    bar = f"{Fore.GREEN}[{Fore.BLUE}{'#' * progress}{Fore.GREEN}{'-' * (bar_length - progress)}{Fore.GREEN}]{Style.RESET_ALL}"
    print(f'\r{bar} {current}/{total} transcripts', end='')

# Separator bar
separator = '=' * 40

# Start writing to the file as each transcript is fetched
total_videos = len(videos)
completed_videos = 0

with ThreadPoolExecutor(max_workers=6) as executor:
    with open(script_file, 'w') as file:
        for title, video_url, transcript_text in executor.map(fetch_transcript, videos):
            file.write(f'{title}\n{video_url}\n\n{transcript_text}\n\n{separator}\n\n')
            completed_videos += 1
            display_progress_bar(total_videos, completed_videos)

# Final output
print(f'\n{Fore.CYAN}Transcripts saved to {script_file}{Style.RESET_ALL}')
	# Dependencies: yt-dlp, youtube_transcript_api, colorama
	# Install them using pip if not already installed:
	# pip install yt-dlp youtube_transcript_api colorama

	import os
	import yt_dlp as youtube_dl
	import shutil
	from youtube_transcript_api import YouTubeTranscriptApi
	from concurrent.futures import ThreadPoolExecutor
	from colorama import Fore, Style

	channel_url = input("Enter the YouTube channel URL: ")

	# Function to fetch a single video's transcript
	def fetch_transcript(video_data):
	video_url, title = video_data
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_url.split('=')[-1])
	transcript_text = '\n'.join([text['text'] for text in transcript])
	except:
	transcript_text = 'Transcript not available.'
	return title, video_url, transcript_text

	# Function to get video URLs and titles from a channel
	def get_channel_videos(channel_url):
	ydl_opts = {'quiet': True, 'extract_flat': True}
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	result = ydl.extract_info(channel_url, download=False)
	if 'entries' in result:
	return [(entry['url'], entry['title']) for entry in result['entries']]
	else:
	return [(result['url'], result['title'])]

	# Function to find the next available script file name
	def find_next_script_file():
	n = 1
	while os.path.exists(f'script{n}.txt'):
	n += 1
	return f'script{n}.txt'

	# Fetch video URLs and titles
	videos = get_channel_videos(channel_url)

	# Output the number of videos found
	print(f'Found {len(videos)} videos. Downloading transcripts...')

	# File to save transcripts
	script_file = find_next_script_file()

	# Function to update and display the progress bar
	def display_progress_bar(total, current):
	terminal_width = shutil.get_terminal_size((80, 20)).columns
	bar_length = terminal_width - 30 # Adjust for the rest of the output
	progress = int(bar_length * current / total)
	bar = f"{Fore.GREEN}[{Fore.BLUE}{'#' * progress}{Fore.GREEN}{'-' * (bar_length - progress)}{Fore.GREEN}]{Style.RESET_ALL}"
	print(f'\r{bar} {current}/{total} transcripts', end='')

	# Separator bar
	separator = '=' * 40

	# Start writing to the file as each transcript is fetched
	total_videos = len(videos)
	completed_videos = 0

	with ThreadPoolExecutor(max_workers=6) as executor:
	with open(script_file, 'w') as file:
	for title, video_url, transcript_text in executor.map(fetch_transcript, videos):
	file.write(f'{title}\n{video_url}\n\n{transcript_text}\n\n{separator}\n\n')
	completed_videos += 1
	display_progress_bar(total_videos, completed_videos)

	# Final output
	print(f'\n{Fore.CYAN}Transcripts saved to {script_file}{Style.RESET_ALL}')