Skip to content

Instantly share code, notes, and snippets.

@kingosticks
Created May 23, 2018 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kingosticks/5f370f1f0fc1170e22ecb1149216621a to your computer and use it in GitHub Desktop.
Save kingosticks/5f370f1f0fc1170e22ecb1149216621a to your computer and use it in GitHub Desktop.
Parses YouTube's mobile search results page
#!/usr/bin/python
from __future__ import unicode_literals
from lxml import html
import requests
import sys
if len(sys.argv) > 1:
query = sys.argv[1]
else:
query = 'Iron Maiden'
results = []
page = requests.get('https://m.youtube.com/results?search_query=%s' % query)
if page.status_code != 200:
print exit(1)
tree = html.fromstring(page.content)
for result_div in tree.xpath('//div[contains(@class, "yt-lockup-content")]'):
item = {}
for elem in result_div.getchildren():
html_class = elem.get('class').strip()
if html_class == 'yt-lockup-title':
for e in elem.getchildren():
if e.tag == 'a':
item['title'] = e.get('title').encode('ascii', 'ignore')
item['uri'] = e.get('href')
elif html_class == 'yt-lockup-byline':
for e in elem.getchildren():
if e.tag == 'a':
item['artist'] = e.text_content().encode('ascii', 'ignore')
if '/watch' in item['uri']:
results.append(item) # Try and filter the adverts
for i, r in enumerate(results):
if '&start_radio=' in r.get('uri'):
track_type = 'Radio'
elif '&list=' in r.get('uri'):
track_type = 'Playlist'
else:
track_type = 'Track'
print '[%2d] %-8s : %-20s : %-50s : %s' % (i, track_type, r.get('artist', '')[:20], r.get('title')[:50], r.get('uri'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment