loony175/process-youku-appinfo-jsonp

## process-youku-appinfo-jsonp
#!/usr/bin/env python3

# - Open Youku video page with web inspector open, search for appinfo, right
#   click on the request, and "copy as cURL";
#
# - In a Linux terminal, run the following command from a fresh directory:
#
#       curl ... | python3 -c "$(curl -s https://gist.githubusercontent.com/zmwangx/79f44ea27915a921b9b06e60043a9468/raw/process-youku-appinfo-jsonp)"
#
#   where curl ... is the command you copied in the previous step.

import collections
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile


USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'


def consume_parse_and_process_jsonp():
    jsonp_content = sys.stdin.read().strip()
    with open('appinfo.jsonp', 'w') as fp:
        fp.write(jsonp_content)
    m = re.match('^\s*\w+\((?P<json>.*)\)\s*$', jsonp_content, re.A)
    assert m, 'response is not JSONP'
    json_content = m.group('json')
    obj = json.loads(json_content, object_pairs_hook=collections.OrderedDict)
    with open('appinfo.json', 'w') as fp:
        json.dump(obj, fp, ensure_ascii=False, indent=2)

    title = obj['data']['data']['video']['title']
    print('Title: %s' % title)
    sys.stderr.write('Available streams:\n')
    streams = obj['data']['data']['stream']
    best_width = 0
    best_stream = None
    for i, s in enumerate(streams):
        print('%d: %s %dx%d' % (i, s['stream_type'], s['width'], s['height']))
        if s['width'] > best_width:
            best_width = s['width']
            best_stream = s

    sys.stderr.write('Saving segment URLs of %s (%dx%d) to urls.txt...\n' %
                     (best_stream['stream_type'], best_stream['width'], best_stream['height']))

    urls = [seg['cdn_url'] for seg in best_stream['segs']]
    with open('urls.txt', 'w') as fp:
        print('\n'.join(urls), file=fp)

    if shutil.which('parallel'):
        sys.stderr.write('Downloading segments with GNU Parallel...\n')
        subprocess.check_call([
            'parallel',
            '--bar', '-j0',
            '[[ -f {#}.done ]] || { wget -c -U "%s" -o {#}.log -O {#}.mp4 {} && touch {#}.done; }' % USER_AGENT,
            '::::', 'urls.txt',
        ])
    else:
        sys.exit('GNU Parallel not found, cannot auto-download.')

    if shutil.which('ffmpeg'):
        sys.stderr.write('Merging downloaded segments with FFmpeg...\n')
        outfile = '%s.mp4' % title
        fd, concat_spec = tempfile.mkstemp(dir='.')
        try:
            with os.fdopen(fd, 'w') as fp:
                for i in range(1, len(urls) + 1):
                    print('file %d.mp4' % i, file=fp)
            subprocess.check_call([
                'ffmpeg',
                '-fflags', '+genpts',
                '-f', 'concat',
                '-i', concat_spec,
                '-c', 'copy',
                '-movflags', 'faststart',
                outfile,
            ])
            sys.stderr.write('Merged into "%s".\n' % outfile)
        finally:
            os.unlink(concat_spec)
    else:
        sys.exit('FFmpeg not found, cannot auto-merge.')


def main():
    consume_parse_and_process_jsonp()


if __name__ == '__main__':
    main()

## youku
#!/bin/bash
ua="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
parallel --bar -j0 -k '[[ -f {#}.done ]] || { wget -c -U "$ua" -o {#}.log -O {#}.mp4 {} && touch {#}.done; };echo "file {#}.mp4"' :::: urls.txt > concat.txt
ffmpeg -fflags +genpts -f concat -i concat.txt -flags +global_header -c copy -movflags faststart output.mp4
	#!/usr/bin/env python3

	# - Open Youku video page with web inspector open, search for appinfo, right
	# click on the request, and "copy as cURL";
	#
	# - In a Linux terminal, run the following command from a fresh directory:
	#
	# curl ... \| python3 -c "$(curl -s https://gist.githubusercontent.com/zmwangx/79f44ea27915a921b9b06e60043a9468/raw/process-youku-appinfo-jsonp)"
	#
	# where curl ... is the command you copied in the previous step.

	import collections
	import json
	import os
	import re
	import shutil
	import subprocess
	import sys
	import tempfile


	USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134'


	def consume_parse_and_process_jsonp():
	jsonp_content = sys.stdin.read().strip()
	with open('appinfo.jsonp', 'w') as fp:
	fp.write(jsonp_content)
	m = re.match('^\s\w+\((?P<json>.)\)\s*$', jsonp_content, re.A)
	assert m, 'response is not JSONP'
	json_content = m.group('json')
	obj = json.loads(json_content, object_pairs_hook=collections.OrderedDict)
	with open('appinfo.json', 'w') as fp:
	json.dump(obj, fp, ensure_ascii=False, indent=2)

	title = obj['data']['data']['video']['title']
	print('Title: %s' % title)
	sys.stderr.write('Available streams:\n')
	streams = obj['data']['data']['stream']
	best_width = 0
	best_stream = None
	for i, s in enumerate(streams):
	print('%d: %s %dx%d' % (i, s['stream_type'], s['width'], s['height']))
	if s['width'] > best_width:
	best_width = s['width']
	best_stream = s

	sys.stderr.write('Saving segment URLs of %s (%dx%d) to urls.txt...\n' %
	(best_stream['stream_type'], best_stream['width'], best_stream['height']))

	urls = [seg['cdn_url'] for seg in best_stream['segs']]
	with open('urls.txt', 'w') as fp:
	print('\n'.join(urls), file=fp)

	if shutil.which('parallel'):
	sys.stderr.write('Downloading segments with GNU Parallel...\n')
	subprocess.check_call([
	'parallel',
	'--bar', '-j0',
	'[[ -f {#}.done ]] \|\| { wget -c -U "%s" -o {#}.log -O {#}.mp4 {} && touch {#}.done; }' % USER_AGENT,
	'::::', 'urls.txt',
	])
	else:
	sys.exit('GNU Parallel not found, cannot auto-download.')

	if shutil.which('ffmpeg'):
	sys.stderr.write('Merging downloaded segments with FFmpeg...\n')
	outfile = '%s.mp4' % title
	fd, concat_spec = tempfile.mkstemp(dir='.')
	try:
	with os.fdopen(fd, 'w') as fp:
	for i in range(1, len(urls) + 1):
	print('file %d.mp4' % i, file=fp)
	subprocess.check_call([
	'ffmpeg',
	'-fflags', '+genpts',
	'-f', 'concat',
	'-i', concat_spec,
	'-c', 'copy',
	'-movflags', 'faststart',
	outfile,
	])
	sys.stderr.write('Merged into "%s".\n' % outfile)
	finally:
	os.unlink(concat_spec)
	else:
	sys.exit('FFmpeg not found, cannot auto-merge.')


	def main():
	consume_parse_and_process_jsonp()


	if __name__ == '__main__':
	main()
	#!/bin/bash
	ua="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
	parallel --bar -j0 -k '[[ -f {#}.done ]] \|\| { wget -c -U "$ua" -o {#}.log -O {#}.mp4 {} && touch {#}.done; };echo "file {#}.mp4"' :::: urls.txt > concat.txt
	ffmpeg -fflags +genpts -f concat -i concat.txt -flags +global_header -c copy -movflags faststart output.mp4