-
-
Save loony175/5b24354cd23f26740572b29e1c7c82f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# - Open Youku video page with web inspector open, search for appinfo, right | |
# click on the request, and "copy as cURL"; | |
# | |
# - In a Linux terminal, run the following command from a fresh directory: | |
# | |
# curl ... | python3 -c "$(curl -s https://gist.githubusercontent.com/zmwangx/79f44ea27915a921b9b06e60043a9468/raw/process-youku-appinfo-jsonp)" | |
# | |
# where curl ... is the command you copied in the previous step. | |
import collections | |
import json | |
import os | |
import re | |
import shutil | |
import subprocess | |
import sys | |
import tempfile | |
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134' | |
def consume_parse_and_process_jsonp(): | |
jsonp_content = sys.stdin.read().strip() | |
with open('appinfo.jsonp', 'w') as fp: | |
fp.write(jsonp_content) | |
m = re.match('^\s*\w+\((?P<json>.*)\)\s*$', jsonp_content, re.A) | |
assert m, 'response is not JSONP' | |
json_content = m.group('json') | |
obj = json.loads(json_content, object_pairs_hook=collections.OrderedDict) | |
with open('appinfo.json', 'w') as fp: | |
json.dump(obj, fp, ensure_ascii=False, indent=2) | |
title = obj['data']['data']['video']['title'] | |
print('Title: %s' % title) | |
sys.stderr.write('Available streams:\n') | |
streams = obj['data']['data']['stream'] | |
best_width = 0 | |
best_stream = None | |
for i, s in enumerate(streams): | |
print('%d: %s %dx%d' % (i, s['stream_type'], s['width'], s['height'])) | |
if s['width'] > best_width: | |
best_width = s['width'] | |
best_stream = s | |
sys.stderr.write('Saving segment URLs of %s (%dx%d) to urls.txt...\n' % | |
(best_stream['stream_type'], best_stream['width'], best_stream['height'])) | |
urls = [seg['cdn_url'] for seg in best_stream['segs']] | |
with open('urls.txt', 'w') as fp: | |
print('\n'.join(urls), file=fp) | |
if shutil.which('parallel'): | |
sys.stderr.write('Downloading segments with GNU Parallel...\n') | |
subprocess.check_call([ | |
'parallel', | |
'--bar', '-j0', | |
'[[ -f {#}.done ]] || { wget -c -U "%s" -o {#}.log -O {#}.mp4 {} && touch {#}.done; }' % USER_AGENT, | |
'::::', 'urls.txt', | |
]) | |
else: | |
sys.exit('GNU Parallel not found, cannot auto-download.') | |
if shutil.which('ffmpeg'): | |
sys.stderr.write('Merging downloaded segments with FFmpeg...\n') | |
outfile = '%s.mp4' % title | |
fd, concat_spec = tempfile.mkstemp(dir='.') | |
try: | |
with os.fdopen(fd, 'w') as fp: | |
for i in range(1, len(urls) + 1): | |
print('file %d.mp4' % i, file=fp) | |
subprocess.check_call([ | |
'ffmpeg', | |
'-fflags', '+genpts', | |
'-f', 'concat', | |
'-i', concat_spec, | |
'-c', 'copy', | |
'-movflags', 'faststart', | |
outfile, | |
]) | |
sys.stderr.write('Merged into "%s".\n' % outfile) | |
finally: | |
os.unlink(concat_spec) | |
else: | |
sys.exit('FFmpeg not found, cannot auto-merge.') | |
def main(): | |
consume_parse_and_process_jsonp() | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
ua="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" | |
parallel --bar -j0 -k '[[ -f {#}.done ]] || { wget -c -U "$ua" -o {#}.log -O {#}.mp4 {} && touch {#}.done; };echo "file {#}.mp4"' :::: urls.txt > concat.txt | |
ffmpeg -fflags +genpts -f concat -i concat.txt -flags +global_header -c copy -movflags faststart output.mp4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment