davidcairuz/youtube_playlist_time.py

## youtube_playlist_time.py
from bs4 import BeautifulSoup as soup #used to beautifie the html code
import datetime as dt #sum the video's duration time
from selenium import webdriver #open webdriver for specific browser
from selenium.webdriver.common.keys import Keys   #for necessary browser action
import time #used for sleep function

#line 63 must be modified for different languages
#line 35 must be modified for different url

times = []

def get_html(): #used to get the html code of the current page
	innerHTML = driver.execute_script("return document.body.innerHTML")
	page_soup = soup(innerHTML, 'html.parser')
	return page_soup

def end_of_page(): #used to scroll down to the bottom of the page
	page_soup = get_html() #gets html to find the number of videos in playlist

	number_videos_container = page_soup.findAll('yt-formatted-string', {'class':'style-scope ytd-playlist-sidebar-primary-info-renderer'}) #gets the number of videos for a future break in the scroll down loop
	str_number = number_videos_container[1].text #next lines clean up the string to make it a real number
	end_of_number = str_number.find(' ')
	str_number = str_number[:end_of_number].replace('.', '')

	number_videos = int(str_number) #number of videos

	times_scroll_down = int((number_videos/100) + 1)

	for i in range(times_scroll_down): #goes to the end of the playlist automatically
		elm = driver.find_element_by_tag_name('html')
		elm.send_keys(Keys.END)
		time.sleep(2)

url = 'https://www.youtube.com/playlist?list=PLOuZHgwKgiV_oXOKyG8iLzyMuSjxKHos0' #change the url to the desired playlist

driver = webdriver.Chrome() #inicializes the webdriver
driver.get(url)

end_of_page()

page_soup = get_html() #gets the complete html, after scrolling down, with all the duration and title of videos

time_containers = page_soup.findAll('div', {'class':'style-scope ytd-thumbnail'})
time_containers[0].text

for container in time_containers: #create a list with timestamps
	time = container.text[7:].rstrip()
	if time.count(':') == 1: #make so the timestamp includes hours as 00 if its shorter than 1 hour
		time = '00:'+ time
	else:
		time = time

	times.append(time)

times = list(filter(None, times)) #filter all the ZERO values of the list

time_total = dt.timedelta() #sets a variable in the HH:MM:SS format

for time in times:
	(h,m,s) = time.split(':')
	time = dt.timedelta(hours = int(h), minutes = int(m), seconds = int(s))

	time_total += time

print ('\nPlaylist time: ' + str(time_total))
print ('\nNumber of avaiable videos: ' + str(len(times)))
	from bs4 import BeautifulSoup as soup #used to beautifie the html code
	import datetime as dt #sum the video's duration time
	from selenium import webdriver #open webdriver for specific browser
	from selenium.webdriver.common.keys import Keys #for necessary browser action
	import time #used for sleep function

	#line 63 must be modified for different languages
	#line 35 must be modified for different url

	times = []

	def get_html(): #used to get the html code of the current page
	innerHTML = driver.execute_script("return document.body.innerHTML")
	page_soup = soup(innerHTML, 'html.parser')
	return page_soup

	def end_of_page(): #used to scroll down to the bottom of the page
	page_soup = get_html() #gets html to find the number of videos in playlist

	number_videos_container = page_soup.findAll('yt-formatted-string', {'class':'style-scope ytd-playlist-sidebar-primary-info-renderer'}) #gets the number of videos for a future break in the scroll down loop
	str_number = number_videos_container[1].text #next lines clean up the string to make it a real number
	end_of_number = str_number.find(' ')
	str_number = str_number[:end_of_number].replace('.', '')

	number_videos = int(str_number) #number of videos

	times_scroll_down = int((number_videos/100) + 1)

	for i in range(times_scroll_down): #goes to the end of the playlist automatically
	elm = driver.find_element_by_tag_name('html')
	elm.send_keys(Keys.END)
	time.sleep(2)

	url = 'https://www.youtube.com/playlist?list=PLOuZHgwKgiV_oXOKyG8iLzyMuSjxKHos0' #change the url to the desired playlist

	driver = webdriver.Chrome() #inicializes the webdriver
	driver.get(url)

	end_of_page()

	page_soup = get_html() #gets the complete html, after scrolling down, with all the duration and title of videos

	time_containers = page_soup.findAll('div', {'class':'style-scope ytd-thumbnail'})
	time_containers[0].text

	for container in time_containers: #create a list with timestamps
	time = container.text[7:].rstrip()
	if time.count(':') == 1: #make so the timestamp includes hours as 00 if its shorter than 1 hour
	time = '00:'+ time
	else:
	time = time

	times.append(time)

	times = list(filter(None, times)) #filter all the ZERO values of the list

	time_total = dt.timedelta() #sets a variable in the HH:MM:SS format

	for time in times:
	(h,m,s) = time.split(':')
	time = dt.timedelta(hours = int(h), minutes = int(m), seconds = int(s))

	time_total += time

	print ('\nPlaylist time: ' + str(time_total))
	print ('\nNumber of avaiable videos: ' + str(len(times)))