Skip to content

Instantly share code, notes, and snippets.

Created January 13, 2022 23:34
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save baranbbr/7bbefe30cf7783fd392043d3e7e98fc5 to your computer and use it in GitHub Desktop.
Creates a Firefox browser instance and saves the transcript of the YT video
import time
from selenium import webdriver
from import By
from selenium.webdriver.common.keys import Keys
from import WebDriverWait
from import expected_conditions as EC
from bs4 import BeautifulSoup
def main():
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 3)
presence = EC.presence_of_element_located
visible = EC.visibility_of_element_located
# load url with video
# play the video
wait.until(visible((By.ID, "video-title")))
# accepting cookies
" > a:nth-child(1)").click()
# click 3 dots menu
"ytd-menu-renderer.ytd-video-primary-info-renderer > yt-icon-button:nth-child(2) > button:nth-child(1)").click()
# click transcript
# wait for transcript panel to load
# save html source of page
with open("yt.html", 'w') as f:
return get_transcript("yt.html")
def get_transcript(page):
with open(page, 'r') as f:
html =
soup = BeautifulSoup(html, 'html.parser')
lines = soup.find_all(
'div', class_='cue style-scope ytd-transcript-body-renderer')
f = open('transcript.txt', 'w')
for line in lines:
# remove whitespace at beginning of line.text
f.write(line.text.strip() + '\n')
# python running code
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment