vangheem/tablo-export.py

## tablo-export.py
#
# Requirements:
#  - Python >= 3.5
#  - requests
#  - aiohttp
#
import argparse
import asyncio
import json
import os
import shutil
import tempfile

import aiohttp
import requests
from lxml.html import fromstring

parser = argparse.ArgumentParser(description='Download all tablo videos.')
parser.add_argument('--output', dest='output', default='./videos')
parser.add_argument('--ip', dest='ip', default='192.168.1.43')
parser.add_argument('--concurrency', type=int, default=4)
args = parser.parse_args()


ENDPOINT = 'http://{}:18080'.format(args.ip)
OUTPUT = args.output
MAX_SIMULTANEOUS_DOWNLOAD = 32
CONCURRENCY = args.concurrency


def get_videos():
    resp = requests.get(f'{ENDPOINT}/pvr')
    dom = fromstring(resp.content)
    videos = []
    for anchor in dom.cssselect('tr td.n a'):
        video = anchor.text_content().strip()
        if video.isdigit():
            videos.append(video)
    return videos


def get_meta(video_id):
    resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt')
    return resp.json()


async def download_segment(session, output_filename, video_id, seg_name):
    print(f'download segment {seg_name} for {output_filename}')
    resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}')
    return await resp.read()


class Show:

    def __init__(self, meta):
        self.meta = meta

    @property
    def program(self):
        return self.meta['recEpisode']['jsonFromTribune']['program']

    @property
    def series(self):
        return self.meta['recSeries']['jsonForClient']

    @property
    def season(self):
        return self.meta['recSeason']['jsonForClient']

    @property
    def episode(self):
        return self.meta['recEpisode']['jsonForClient']

    @property
    def episode_title(self):
        try:
            return self.program['episodeTitle']
        except KeyError:
            try:
                return self.season['episodeTitle']
            except KeyError:
                return self.episode['title']

    @property
    def title(self):
        try:
            return self.program['title']
        except KeyError:
            return self.series['title']

    @property
    def episode_number(self):
        try:
            return str(self.program['episodeNum']).zfill(2)
        except KeyError:
            return str(self.episode['episodeNumber']).zfill(2)

    @property
    def season_number(self):
        try:
            return str(self.program['seasonNum']).zfill(2)
        except KeyError:
            return str(self.season['seasonNumber']).zfill(2)

    @property
    def identifier(self):
        if self.valid_season:
            return f's{self.season_number}e{self.episode_number}'
        else:
            return self.program['seriesId']

    @property
    def valid_season(self):
        return self.season_number != '00' and self.episode_number != '00'

    @property
    def is_movie(self):
        return not self.valid_season and 'jsonFromTribune' not in self.meta['recEpisode']

    @property
    def year(self):
        return self.series['originalAirDate'].split('-')[0]


async def download(video_id):
    meta = get_meta(video_id)
    resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
    dom = fromstring(resp.content)

    tmp_dir = tempfile.mkdtemp()
    show = Show(meta)

    if show.is_movie:
        output_dir = os.path.join(OUTPUT, 'Movies')
        output_filename = f"{show.title} ({show.year})"
    else:
        output_filename = f"{show.title} - {show.identifier} - {show.episode_title}"
        if show.valid_season:
            output_dir = os.path.join(OUTPUT, 'TV Shows', show.title,
                                      f"Season {show.season_number}")
        else:
            output_dir = os.path.join(OUTPUT, 'TV Shows', show.title)

    ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_filepath = f'{output_dir}/{output_filename}.mp4'

    meta_dir = os.path.join(OUTPUT, 'meta')
    if not os.path.exists(meta_dir):
        os.mkdir(meta_dir)
    meta_filepath = os.path.join(meta_dir, output_filename + '.json')
    fi = open(meta_filepath, 'w')
    fi.write(json.dumps(meta))
    fi.close()

    if os.path.exists(output_filepath):
        print(f'Skipping {output_filepath}, already downloaded')
        return

    segments = dom.cssselect('tr td.n')
    batch = []
    count = 0
    session = aiohttp.ClientSession()
    for seg in segments:
        count += 1
        seg_name = seg.text_content()
        if seg_name in ('Parent Directory/',):
            continue
        batch.append(download_segment(session, output_filename, video_id, seg_name))

        if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY):
            fi = open(ts_filepath, 'ab')
            for file_chunk in await asyncio.gather(*batch):
                fi.write(file_chunk)
            fi.close()
            print(f'Downloaded ({count}/{len(segments)}) of {output_filename}')
            batch = []
    fi = open(ts_filepath, 'ab')
    for file_chunk in await asyncio.gather(*batch):
        fi.write(file_chunk)
    fi.close()

    cmd = [
        'ffmpeg', '-y', '-i', ts_filepath,
        '-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2',
        '-c', 'copy', f'{output_filepath}']
    print(f'Executing: {" ".join(cmd)}')
    process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
    await process.communicate()
    await session.close()
    shutil.rmtree(tmp_dir)


async def download_all():
    videos = get_videos()
    batch = []
    for video in videos:
        batch.append(download(video))
        if len(batch) >= CONCURRENCY:
            await asyncio.gather(*batch)
            batch = []
    await asyncio.gather(*batch)


if __name__ == '__main__':
    event_loop = asyncio.get_event_loop()
    event_loop.run_until_complete(download_all())
	#
	# Requirements:
	# - Python >= 3.5
	# - requests
	# - aiohttp
	#
	import argparse
	import asyncio
	import json
	import os
	import shutil
	import tempfile

	import aiohttp
	import requests
	from lxml.html import fromstring

	parser = argparse.ArgumentParser(description='Download all tablo videos.')
	parser.add_argument('--output', dest='output', default='./videos')
	parser.add_argument('--ip', dest='ip', default='192.168.1.43')
	parser.add_argument('--concurrency', type=int, default=4)
	args = parser.parse_args()


	ENDPOINT = 'http://{}:18080'.format(args.ip)
	OUTPUT = args.output
	MAX_SIMULTANEOUS_DOWNLOAD = 32
	CONCURRENCY = args.concurrency


	def get_videos():
	resp = requests.get(f'{ENDPOINT}/pvr')
	dom = fromstring(resp.content)
	videos = []
	for anchor in dom.cssselect('tr td.n a'):
	video = anchor.text_content().strip()
	if video.isdigit():
	videos.append(video)
	return videos


	def get_meta(video_id):
	resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/meta.txt')
	return resp.json()


	async def download_segment(session, output_filename, video_id, seg_name):
	print(f'download segment {seg_name} for {output_filename}')
	resp = await session.get(f'{ENDPOINT}/pvr/{video_id}/segs/{seg_name}')
	return await resp.read()


	class Show:

	def __init__(self, meta):
	self.meta = meta

	@property
	def program(self):
	return self.meta['recEpisode']['jsonFromTribune']['program']

	@property
	def series(self):
	return self.meta['recSeries']['jsonForClient']

	@property
	def season(self):
	return self.meta['recSeason']['jsonForClient']

	@property
	def episode(self):
	return self.meta['recEpisode']['jsonForClient']

	@property
	def episode_title(self):
	try:
	return self.program['episodeTitle']
	except KeyError:
	try:
	return self.season['episodeTitle']
	except KeyError:
	return self.episode['title']

	@property
	def title(self):
	try:
	return self.program['title']
	except KeyError:
	return self.series['title']

	@property
	def episode_number(self):
	try:
	return str(self.program['episodeNum']).zfill(2)
	except KeyError:
	return str(self.episode['episodeNumber']).zfill(2)

	@property
	def season_number(self):
	try:
	return str(self.program['seasonNum']).zfill(2)
	except KeyError:
	return str(self.season['seasonNumber']).zfill(2)

	@property
	def identifier(self):
	if self.valid_season:
	return f's{self.season_number}e{self.episode_number}'
	else:
	return self.program['seriesId']

	@property
	def valid_season(self):
	return self.season_number != '00' and self.episode_number != '00'

	@property
	def is_movie(self):
	return not self.valid_season and 'jsonFromTribune' not in self.meta['recEpisode']

	@property
	def year(self):
	return self.series['originalAirDate'].split('-')[0]


	async def download(video_id):
	meta = get_meta(video_id)
	resp = requests.get(f'{ENDPOINT}/pvr/{video_id}/segs')
	dom = fromstring(resp.content)

	tmp_dir = tempfile.mkdtemp()
	show = Show(meta)

	if show.is_movie:
	output_dir = os.path.join(OUTPUT, 'Movies')
	output_filename = f"{show.title} ({show.year})"
	else:
	output_filename = f"{show.title} - {show.identifier} - {show.episode_title}"
	if show.valid_season:
	output_dir = os.path.join(OUTPUT, 'TV Shows', show.title,
	f"Season {show.season_number}")
	else:
	output_dir = os.path.join(OUTPUT, 'TV Shows', show.title)

	ts_filepath = os.path.join(tmp_dir, output_filename + '.ts')
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)
	output_filepath = f'{output_dir}/{output_filename}.mp4'

	meta_dir = os.path.join(OUTPUT, 'meta')
	if not os.path.exists(meta_dir):
	os.mkdir(meta_dir)
	meta_filepath = os.path.join(meta_dir, output_filename + '.json')
	fi = open(meta_filepath, 'w')
	fi.write(json.dumps(meta))
	fi.close()

	if os.path.exists(output_filepath):
	print(f'Skipping {output_filepath}, already downloaded')
	return

	segments = dom.cssselect('tr td.n')
	batch = []
	count = 0
	session = aiohttp.ClientSession()
	for seg in segments:
	count += 1
	seg_name = seg.text_content()
	if seg_name in ('Parent Directory/',):
	continue
	batch.append(download_segment(session, output_filename, video_id, seg_name))

	if len(batch) >= (MAX_SIMULTANEOUS_DOWNLOAD / CONCURRENCY):
	fi = open(ts_filepath, 'ab')
	for file_chunk in await asyncio.gather(*batch):
	fi.write(file_chunk)
	fi.close()
	print(f'Downloaded ({count}/{len(segments)}) of {output_filename}')
	batch = []
	fi = open(ts_filepath, 'ab')
	for file_chunk in await asyncio.gather(*batch):
	fi.write(file_chunk)
	fi.close()

	cmd = [
	'ffmpeg', '-y', '-i', ts_filepath,
	'-vcodec', 'h264', '-acodec', 'aac', '-strict', '-2',
	'-c', 'copy', f'{output_filepath}']
	print(f'Executing: {" ".join(cmd)}')
	process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE)
	await process.communicate()
	await session.close()
	shutil.rmtree(tmp_dir)


	async def download_all():
	videos = get_videos()
	batch = []
	for video in videos:
	batch.append(download(video))
	if len(batch) >= CONCURRENCY:
	await asyncio.gather(*batch)
	batch = []
	await asyncio.gather(*batch)


	if __name__ == '__main__':
	event_loop = asyncio.get_event_loop()
	event_loop.run_until_complete(download_all())