Skip to content

Instantly share code, notes, and snippets.

@jleclanche
Last active August 29, 2015 14:05
Show Gist options
  • Save jleclanche/1b0cd622fabb05b2ced9 to your computer and use it in GitHub Desktop.
Save jleclanche/1b0cd622fabb05b2ced9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import json
import logging
import os
import re
import string
import sys
import requests
from datetime import datetime
from slugify import slugify
TWITCH_API_ROOT = "https://api.twitch.tv"
BASE_URL = "https://api.justin.tv"
TWITCH_API_CHANNEL_VIDEO_PATH = "/kraken/channels/%(channel)s/videos"
BDSC_API_LIMIT = 100
def _prep_dir_for(filename):
"Helper that ensures the directory for \a filename exists"
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
def download_file(url, local_filename):
logging.info("downloading {0}".format(local_filename))
CS = 1024
done = 0
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=CS):
if not chunk: # filter out keep-alive new chunks
continue
f.write(chunk)
f.flush()
done += CS
sys.stdout.write("\r{0:>7.2f} MB".format(done/float(pow(1024,2))))
logging.info("done\n")
def download_broadcast(channel, id_):
pattern = '{base}/api/broadcast/by_archive/{id_}.json?onsite=true'
url = pattern.format(base=BASE_URL, id_=id_)
r = requests.get(url, verify=False)
if r.status_code != 200:
raise Exception("API returned {0}".format(r.status_code))
try:
chunks = r.json()
except ValueError as e:
logging.error("API did not return valid JSON: {}".format(e))
logging.error("{}".format(r.text))
quit()
label = slugify(id_ + "-" + chunks[0]['title'])
for nr, chunk in enumerate(sorted(chunks, key=lambda x: x['start_timestamp'])):
video_url = chunk['video_file_url']
ext = os.path.splitext(video_url)[1]
filename = os.path.join(channel, label, "{0:0>2}{1}".format(nr, ext))
_prep_dir_for(filename)
if not os.path.exists(filename):
download_file(video_url, filename)
def main():
logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)
channel = sys.argv[1].lower()
path = TWITCH_API_ROOT + TWITCH_API_CHANNEL_VIDEO_PATH % {"channel": channel}
params = {"limit": BDSC_API_LIMIT, "offset": 0, "on_site": 1, "broadcasts": "true"}
videos = []
timestamp = datetime.now()
while True:
logging.info("Querying %r (%r)" % (path, params))
r = requests.get(path, params=params, verify=False)
params["offset"] += BDSC_API_LIMIT
try:
data = r.json()
except ValueError as e:
logging.warning("Received malformed data: ", e)
break
if "videos" not in data or not data["videos"]:
logging.info("No more videos (%r). Stopping." % (data))
break
videos += data["videos"]
fname = "%s-%s.json" % (channel, timestamp.isoformat())
with open(fname, "w") as f:
json.dump(videos, f)
logging.info("Saved metadata to %r" % (fname))
for video in videos:
id = video["_id"][1:]
try:
download_broadcast(channel, id)
except Exception as e:
logging.error("Could not download video id %r: %r" % (id, e))
raise
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment