Skip to content

Instantly share code, notes, and snippets.

@davidcairuz
Last active November 13, 2023 15:22
Show Gist options
  • Save davidcairuz/b187bf657e1b1174f712224c1794ce5b to your computer and use it in GitHub Desktop.
Save davidcairuz/b187bf657e1b1174f712224c1794ce5b to your computer and use it in GitHub Desktop.
Script that scrolls through all of a YouTube playlist and calculates it's total duration. I'm still learning Python so all of your suggestions will be very appreciated.
from bs4 import BeautifulSoup as soup #used to beautifie the html code
import datetime as dt #sum the video's duration time
from selenium import webdriver #open webdriver for specific browser
from selenium.webdriver.common.keys import Keys #for necessary browser action
import time #used for sleep function
#line 63 must be modified for different languages
#line 35 must be modified for different url
times = []
def get_html(): #used to get the html code of the current page
innerHTML = driver.execute_script("return document.body.innerHTML")
page_soup = soup(innerHTML, 'html.parser')
return page_soup
def end_of_page(): #used to scroll down to the bottom of the page
page_soup = get_html() #gets html to find the number of videos in playlist
number_videos_container = page_soup.findAll('yt-formatted-string', {'class':'style-scope ytd-playlist-sidebar-primary-info-renderer'}) #gets the number of videos for a future break in the scroll down loop
str_number = number_videos_container[1].text #next lines clean up the string to make it a real number
end_of_number = str_number.find(' ')
str_number = str_number[:end_of_number].replace('.', '')
number_videos = int(str_number) #number of videos
times_scroll_down = int((number_videos/100) + 1)
for i in range(times_scroll_down): #goes to the end of the playlist automatically
elm = driver.find_element_by_tag_name('html')
elm.send_keys(Keys.END)
time.sleep(2)
url = 'https://www.youtube.com/playlist?list=PLOuZHgwKgiV_oXOKyG8iLzyMuSjxKHos0' #change the url to the desired playlist
driver = webdriver.Chrome() #inicializes the webdriver
driver.get(url)
end_of_page()
page_soup = get_html() #gets the complete html, after scrolling down, with all the duration and title of videos
time_containers = page_soup.findAll('div', {'class':'style-scope ytd-thumbnail'})
time_containers[0].text
for container in time_containers: #create a list with timestamps
time = container.text[7:].rstrip()
if time.count(':') == 1: #make so the timestamp includes hours as 00 if its shorter than 1 hour
time = '00:'+ time
else:
time = time
times.append(time)
times = list(filter(None, times)) #filter all the ZERO values of the list
time_total = dt.timedelta() #sets a variable in the HH:MM:SS format
for time in times:
(h,m,s) = time.split(':')
time = dt.timedelta(hours = int(h), minutes = int(m), seconds = int(s))
time_total += time
print ('\nPlaylist time: ' + str(time_total))
print ('\nNumber of avaiable videos: ' + str(len(times)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment