Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
An example of how to download metadata for all videos of a specified YouTube channel using simple BeautifulSoup screen scraping. The data will be much more up-to-date and reliable than the RSS feed or YouTube API results (which is a sad state of affairs really).
import urllib
import json
from bs4 import BeautifulSoup
from collections import namedtuple
Video = namedtuple("Video", "video_id title duration views thumbnail")
def parse_video_div(div):
video_id = div.get("data-context-item-id", "")
title = div.find("a", "yt-uix-tile-link").text
duration = div.find("span", "video-time").contents[0].text
views = int(div.find("ul", "yt-lockup-meta-info").contents[0].text.rstrip(" views").replace(",", ""))
img = div.find("img")
thumbnail = "http:" + img.get("src", "") if img else ""
return Video(video_id, title, duration, views, thumbnail)
def parse_videos_page(page):
video_divs = page.find_all("div", "yt-lockup-video")
return [parse_video_div(div) for div in video_divs]
def find_load_more_url(page):
for button in page.find_all("button"):
url = button.get("data-uix-load-more-href")
if url:
return "http://www.youtube.com" + url
def download_page(url):
print("Downloading {0}".format(url))
return urllib.urlopen(url).read()
def get_videos(username):
page_url = "http://www.youtube.com/user/{0}/videos".format(username)
page = BeautifulSoup(download_page(page_url))
videos = parse_videos_page(page)
page_url = find_load_more_url(page)
while page_url:
json_data = json.loads(download_page(page_url))
page = BeautifulSoup(json_data.get("content_html", ""))
videos.extend(parse_videos_page(page))
page_url = find_load_more_url(BeautifulSoup(json_data.get("load_more_widget_html", "")))
return videos
if __name__ == "__main__":
videos = get_videos("jimmydiresta")
for video in videos:
print(video)
print("{0} videos".format(len(videos)))

Thanks for this - so useful. However, i can't seem to get it to do anything. When I run the script it appears to work but no data downloads and it says it has completed 0 videos. Any advice?

Owner

shaurz commented Apr 24, 2015

I think YouTube has changed something so this code doesn't work any more.

Owner

shaurz commented Apr 24, 2015

Updated to work with latest YouTube HTML output. Not as neat as it used to be.

hello, really useful script. can you help me please. i need to make same script for playlist of youtube. any suggestion.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment