Skip to content

Instantly share code, notes, and snippets.

@magical
Last active December 12, 2015 07:28
Show Gist options
  • Save magical/4736241 to your computer and use it in GitHub Desktop.
Save magical/4736241 to your computer and use it in GitHub Desktop.
screen.yahoo.com video grabber
# Get a video url from screen.yahoo.com
# python3 yahoo.py 'http://screen.yahoo.com/raven-refused-sing-010647927.html' | xargs vlc
import sys
import contextlib
import json
import random
import re
import urllib.parse
import urllib.request
API_URL = "video.query.yahoo.com"
API_PATH = "/v1/public/yql"
QUERY = (
'SELECT * FROM yahoo.media.video.streams WHERE id="{id}" AND format="{format}"' +
' AND protocol="{protocol}" AND plrs="{player_guid}" AND offnetwork="{onet}"' +
' AND site="{site}" AND lang="{lang}" AND region="{region}" AND override="";')
def create_guid():
chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._"
s = ""
while len(s) < 22:
s += random.choice(chars)
return s
def fatal(s):
print(s, file=sys.stderr)
sys.exit(1)
def urlopen(url):
return contextlib.closing(urllib.request.urlopen(url))
def main():
page_url = sys.argv[1]
if not page_url.startswith("http://screen.yahoo.com/"):
fatal("Doesn't look like a Yahoo! Screen url")
with urlopen(page_url) as f:
page_head = f.read(4096)
m = re.search(rb'<meta property="og:video" content="([^"]*)"/?>', page_head)
if m is None:
fatal("Couldn't find video player url")
player_url = m.group(1).decode('utf-8', errors='ignore').replace("&amp;", "&")
#print(player_url)
q = urllib.parse.parse_qs(
urllib.parse.urlparse(player_url).query)
query_params = {
'id': q['uuid'][0],
'format': "mp4, flv",
'protocol': "http", # rtmp,http
'player_guid': create_guid(),
'lang': "en-US",
'region': q['region'][0],
'onet': q['onet'][0],
'site': q['site'][0],
}
#print(query_params)
info_url = "http://{API_URL}{API_PATH}?q={query}&env=prod&format={fmt}".format(
API_URL=API_URL,
API_PATH=API_PATH,
query=urllib.parse.quote(QUERY.format(**query_params)),
fmt="json", # xml
)
#print(info_url)
with urlopen(info_url) as f:
info = json.loads(f.read().decode('utf-8'))
#print(json.dumps(info))
streams = info['query']['results']['mediaObj'][0]['streams']
stream = streams[0]
video_url = stream['host'] + stream['path']
print(video_url)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment