baranbbr/get_transcript.py

## get_transcript.py
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from bs4 import BeautifulSoup


def main():
    driver = webdriver.Firefox()
    driver.maximize_window()

    wait = WebDriverWait(driver, 3)
    presence = EC.presence_of_element_located
    visible = EC.visibility_of_element_located

    # load url with video
    driver.get("https://www.youtube.com/watch?v=bWPMSSsVdPk")

    # play the video
    wait.until(visible((By.ID, "video-title")))
    # accepting cookies
    driver.find_element_by_css_selector(
        "ytd-button-renderer.style-scope:nth-child(2) > a:nth-child(1)").click()
    # click 3 dots menu
    driver.find_element_by_css_selector(
        "ytd-menu-renderer.ytd-video-primary-info-renderer > yt-icon-button:nth-child(2) > button:nth-child(1)").click()
    # click transcript
    driver.find_element_by_css_selector(
        "tp-yt-paper-item.ytd-menu-service-item-renderer").click()
    # wait for transcript panel to load
    wait.until(visible(
        (By.CSS_SELECTOR, "ytd-engagement-panel-section-list-renderer.style-scope")))
    # save html source of page
    with open("yt.html", 'w') as f:
        f.write(driver.page_source)
    driver.close()
    return get_transcript("yt.html")


def get_transcript(page):
    with open(page, 'r') as f:
        html = f.read()
    soup = BeautifulSoup(html, 'html.parser')
    lines = soup.find_all(
        'div', class_='cue style-scope ytd-transcript-body-renderer')
    f = open('transcript.txt', 'w')
    for line in lines:
        # remove whitespace at beginning of line.text
        f.write(line.text.strip() + '\n')
    f.close()


# python running code
if __name__ == "__main__":
    main()
	import time
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC

	from bs4 import BeautifulSoup


	def main():
	driver = webdriver.Firefox()
	driver.maximize_window()

	wait = WebDriverWait(driver, 3)
	presence = EC.presence_of_element_located
	visible = EC.visibility_of_element_located

	# load url with video
	driver.get("https://www.youtube.com/watch?v=bWPMSSsVdPk")

	# play the video
	wait.until(visible((By.ID, "video-title")))
	# accepting cookies
	driver.find_element_by_css_selector(
	"ytd-button-renderer.style-scope:nth-child(2) > a:nth-child(1)").click()
	# click 3 dots menu
	driver.find_element_by_css_selector(
	"ytd-menu-renderer.ytd-video-primary-info-renderer > yt-icon-button:nth-child(2) > button:nth-child(1)").click()
	# click transcript
	driver.find_element_by_css_selector(
	"tp-yt-paper-item.ytd-menu-service-item-renderer").click()
	# wait for transcript panel to load
	wait.until(visible(
	(By.CSS_SELECTOR, "ytd-engagement-panel-section-list-renderer.style-scope")))
	# save html source of page
	with open("yt.html", 'w') as f:
	f.write(driver.page_source)
	driver.close()
	return get_transcript("yt.html")


	def get_transcript(page):
	with open(page, 'r') as f:
	html = f.read()
	soup = BeautifulSoup(html, 'html.parser')
	lines = soup.find_all(
	'div', class_='cue style-scope ytd-transcript-body-renderer')
	f = open('transcript.txt', 'w')
	for line in lines:
	# remove whitespace at beginning of line.text
	f.write(line.text.strip() + '\n')
	f.close()


	# python running code
	if __name__ == "__main__":
	main()