Skip to content

Instantly share code, notes, and snippets.

@loony175
Forked from zmwangx/process-youku-appinfo-jsonp
Last active June 16, 2018 07:46
Show Gist options
  • Save loony175/5b24354cd23f26740572b29e1c7c82f9 to your computer and use it in GitHub Desktop.
Save loony175/5b24354cd23f26740572b29e1c7c82f9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# - Open Youku video page with web inspector open, search for appinfo, right
# click on the request, and "copy as cURL";
#
# - In a Linux terminal, run the following command from a fresh directory:
#
# curl ... | python3 -c "$(curl -s https://gist.githubusercontent.com/zmwangx/79f44ea27915a921b9b06e60043a9468/raw/process-youku-appinfo-jsonp)"
#
# where curl ... is the command you copied in the previous step.
import collections
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'
def consume_parse_and_process_jsonp():
jsonp_content = sys.stdin.read().strip()
with open('appinfo.jsonp', 'w') as fp:
fp.write(jsonp_content)
m = re.match('^\s*\w+\((?P<json>.*)\)\s*$', jsonp_content, re.A)
assert m, 'response is not JSONP'
json_content = m.group('json')
obj = json.loads(json_content, object_pairs_hook=collections.OrderedDict)
with open('appinfo.json', 'w') as fp:
json.dump(obj, fp, ensure_ascii=False, indent=2)
title = obj['data']['data']['video']['title']
print('Title: %s' % title)
sys.stderr.write('Available streams:\n')
streams = obj['data']['data']['stream']
best_width = 0
best_stream = None
for i, s in enumerate(streams):
print('%d: %s %dx%d' % (i, s['stream_type'], s['width'], s['height']))
if s['width'] > best_width:
best_width = s['width']
best_stream = s
sys.stderr.write('Saving segment URLs of %s (%dx%d) to urls.txt...\n' %
(best_stream['stream_type'], best_stream['width'], best_stream['height']))
urls = [seg['cdn_url'] for seg in best_stream['segs']]
with open('urls.txt', 'w') as fp:
print('\n'.join(urls), file=fp)
if shutil.which('parallel'):
sys.stderr.write('Downloading segments with GNU Parallel...\n')
subprocess.check_call([
'parallel',
'--bar', '-j0',
'[[ -f {#}.done ]] || { wget -c -U "%s" -o {#}.log -O {#}.mp4 {} && touch {#}.done; }' % USER_AGENT,
'::::', 'urls.txt',
])
else:
sys.exit('GNU Parallel not found, cannot auto-download.')
if shutil.which('ffmpeg'):
sys.stderr.write('Merging downloaded segments with FFmpeg...\n')
outfile = '%s.mp4' % title
fd, concat_spec = tempfile.mkstemp(dir='.')
try:
with os.fdopen(fd, 'w') as fp:
for i in range(1, len(urls) + 1):
print('file %d.mp4' % i, file=fp)
subprocess.check_call([
'ffmpeg',
'-fflags', '+genpts',
'-f', 'concat',
'-i', concat_spec,
'-c', 'copy',
'-movflags', 'faststart',
outfile,
])
sys.stderr.write('Merged into "%s".\n' % outfile)
finally:
os.unlink(concat_spec)
else:
sys.exit('FFmpeg not found, cannot auto-merge.')
def main():
consume_parse_and_process_jsonp()
if __name__ == '__main__':
main()
#!/bin/bash
ua="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
parallel --bar -j0 -k '[[ -f {#}.done ]] || { wget -c -U "$ua" -o {#}.log -O {#}.mp4 {} && touch {#}.done; };echo "file {#}.mp4"' :::: urls.txt > concat.txt
ffmpeg -fflags +genpts -f concat -i concat.txt -flags +global_header -c copy -movflags faststart output.mp4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment