nmante/main.py

## main.py
#!/usr/local/bin/python3

import boto3
import json
import logging
import subprocess as sp
import argparse
import os

def create_parser():
    parser = argparse.ArgumentParser(description='Use ffmpeg to convert .sub caption files to .srt files, and upload to s3')
    parser.add_argument('-c', '--config', help='JSON Configuration file with s3 access keys', default='config.json')
    parser.add_argument('-i', '--captions_info', help='JSON file containing caption key/filenames on s3', default='captions_info.json')
    parser.add_argument(
        '-e', '--execute',
        help='Set this flag to actually update the files on s3. If this is not set, s3 files will NOT be updated',
        action='store_true'
    )
    parser.add_argument('-o', '--output_dir', help='The output directory to store the converted caption files. Directory must exist', default='/tmp')
    parser.add_argument(
        '-r', '--remove_files',
        help='Set this flag to remove the converted caption files from your system',
        action='store_true'
    )

    return parser

class CaptionFixer:
    def __init__(
        self,
        should_execute_updates=False,
        credentials_filename='config.json',
        json_captions_file='captions_info.json',
        s3_url='https://s3.amazonaws.com/video-api-prod',
        output_dir='/tmp',
        should_remove_files=False
    ):
        credentials = self._validate_and_load_credentials(credentials_filename)
        transcript_assets_info = self._validate_and_load_transcript_assets(json_captions_file)
        self._credentials = credentials
        self._transcript_assets_info = transcript_assets_info
        self._S3Client = self._create_s3_client(credentials)
        self._should_execute_updates = should_execute_updates
        self._url = s3_url
        self._output_captions_dir = output_dir
        self._should_remove_files = should_remove_files

    def _create_s3_client(self, credentials):
        try:
            client = boto3.client(
                's3',
                aws_access_key_id=credentials['aws_access_key_id'],
                aws_secret_access_key=credentials['aws_secret_access_key'],
            )
        except Exception as e:
            logging.error('Could not initialize s3 boto client')
            logging.exception(e)
            return

        return client

    def _validate_and_load_credentials(self, credentials_filename):
        required_keys = set(['aws_access_key_id', 'aws_access_secret_key'])
        credentials = {}
        with open(credentials_filename) as f:
            credentials = dict(json.load(f))

        if len(required_keys - credentials.keys()) > 0:
            logging.error('Must have required keys in config file {}'.format(required_keys))
            raise ValueError

        return credentials

    def _validate_and_load_transcript_assets(self, json_captions_file):
        transcript_assets = []
        required_keys = set(['filename', 'storage_path'])
        line_number = 0

        with open(json_captions_file) as f:
            for line in f:
                transcript_asset = json.loads(line)
                if len(required_keys - transcript_asset.keys()) > 0:
                    logging.error(
                        'Transcript asset {} missing required keys {}. Line number {}'
                        .format(transcript_asset['id'], required_keys, line_number)
                    )
                    continue

                transcript_assets.append(transcript_asset)

        return transcript_assets

    def _convert_sub_caption_to_srt(self, storage_path, bucket='assets'):
        try:
            url = '{}/{}/{}'.format(self._url, bucket, storage_path)
            tmp_caption_filename = '{}/{}'.format(self._output_captions_dir, storage_path.replace('/', '-'))
            command = [
                'ffmpeg',
                '-i', url,
                tmp_caption_filename
            ]

            with sp.Popen(command, stdout=sp.PIPE, bufsize=1024) as pipe:
                logging.info('Converted caption file {}. Stored at tmp file'.format(storage_path))
                logging.info(pipe.stdout.read())
        except Exception as e:
            logging.info('Could not convert caption file {}'.format(storage_path))
            logging.exception(e)
            return None

        return tmp_caption_filename

    def _update_previous_sub_file(self, tmp_caption_filename, storage_path, bucket='assets'):
        try:
            with open(tmp_caption_filename, 'rb') as f:
                logging.info(f.read())
                self._S3Client.put_object(Body=f, Bucket=bucket, Key=storage_path)
                return True
        except Except:
            logging.error('Could not update caption file {}'.format(storage_path))
            logging.exception(e)

        return False

    def execute(self):
        for transcript_asset in self._transcript_assets_info:
            storage_path = transcript_asset['storage_path'].replace('assets/', '')
            tmp_caption_filename = self._convert_sub_caption_to_srt(storage_path)

            if not tmp_caption_filename:
                continue

            if self._should_execute_updates:
                self._update_previous_sub_file(tmp_caption_filename, storage_path)

            if self._should_remove_files:
                os.remove(tmp_caption_filename)

def main():
    parser = create_parser()
    args = parser.parse_args()
    fixer = CaptionFixer(
        should_execute_updates=args.execute,
        credentials_filename=args.config,
        json_captions_file=args.captions_info,
        output_dir=args.output_dir,
        should_remove_files=args.remove_files
    )

    fixer.execute()

if __name__ == "__main__":
    main()
	#!/usr/local/bin/python3

	import boto3
	import json
	import logging
	import subprocess as sp
	import argparse
	import os

	def create_parser():
	parser = argparse.ArgumentParser(description='Use ffmpeg to convert .sub caption files to .srt files, and upload to s3')
	parser.add_argument('-c', '--config', help='JSON Configuration file with s3 access keys', default='config.json')
	parser.add_argument('-i', '--captions_info', help='JSON file containing caption key/filenames on s3', default='captions_info.json')
	parser.add_argument(
	'-e', '--execute',
	help='Set this flag to actually update the files on s3. If this is not set, s3 files will NOT be updated',
	action='store_true'
	)
	parser.add_argument('-o', '--output_dir', help='The output directory to store the converted caption files. Directory must exist', default='/tmp')
	parser.add_argument(
	'-r', '--remove_files',
	help='Set this flag to remove the converted caption files from your system',
	action='store_true'
	)

	return parser

	class CaptionFixer:
	def __init__(
	self,
	should_execute_updates=False,
	credentials_filename='config.json',
	json_captions_file='captions_info.json',
	s3_url='https://s3.amazonaws.com/video-api-prod',
	output_dir='/tmp',
	should_remove_files=False
	):
	credentials = self._validate_and_load_credentials(credentials_filename)
	transcript_assets_info = self._validate_and_load_transcript_assets(json_captions_file)
	self._credentials = credentials
	self._transcript_assets_info = transcript_assets_info
	self._S3Client = self._create_s3_client(credentials)
	self._should_execute_updates = should_execute_updates
	self._url = s3_url
	self._output_captions_dir = output_dir
	self._should_remove_files = should_remove_files

	def _create_s3_client(self, credentials):
	try:
	client = boto3.client(
	's3',
	aws_access_key_id=credentials['aws_access_key_id'],
	aws_secret_access_key=credentials['aws_secret_access_key'],
	)
	except Exception as e:
	logging.error('Could not initialize s3 boto client')
	logging.exception(e)
	return

	return client

	def _validate_and_load_credentials(self, credentials_filename):
	required_keys = set(['aws_access_key_id', 'aws_access_secret_key'])
	credentials = {}
	with open(credentials_filename) as f:
	credentials = dict(json.load(f))

	if len(required_keys - credentials.keys()) > 0:
	logging.error('Must have required keys in config file {}'.format(required_keys))
	raise ValueError

	return credentials

	def _validate_and_load_transcript_assets(self, json_captions_file):
	transcript_assets = []
	required_keys = set(['filename', 'storage_path'])
	line_number = 0

	with open(json_captions_file) as f:
	for line in f:
	transcript_asset = json.loads(line)
	if len(required_keys - transcript_asset.keys()) > 0:
	logging.error(
	'Transcript asset {} missing required keys {}. Line number {}'
	.format(transcript_asset['id'], required_keys, line_number)
	)
	continue

	transcript_assets.append(transcript_asset)

	return transcript_assets

	def _convert_sub_caption_to_srt(self, storage_path, bucket='assets'):
	try:
	url = '{}/{}/{}'.format(self._url, bucket, storage_path)
	tmp_caption_filename = '{}/{}'.format(self._output_captions_dir, storage_path.replace('/', '-'))
	command = [
	'ffmpeg',
	'-i', url,
	tmp_caption_filename
	]

	with sp.Popen(command, stdout=sp.PIPE, bufsize=1024) as pipe:
	logging.info('Converted caption file {}. Stored at tmp file'.format(storage_path))
	logging.info(pipe.stdout.read())
	except Exception as e:
	logging.info('Could not convert caption file {}'.format(storage_path))
	logging.exception(e)
	return None

	return tmp_caption_filename

	def _update_previous_sub_file(self, tmp_caption_filename, storage_path, bucket='assets'):
	try:
	with open(tmp_caption_filename, 'rb') as f:
	logging.info(f.read())
	self._S3Client.put_object(Body=f, Bucket=bucket, Key=storage_path)
	return True
	except Except:
	logging.error('Could not update caption file {}'.format(storage_path))
	logging.exception(e)

	return False

	def execute(self):
	for transcript_asset in self._transcript_assets_info:
	storage_path = transcript_asset['storage_path'].replace('assets/', '')
	tmp_caption_filename = self._convert_sub_caption_to_srt(storage_path)

	if not tmp_caption_filename:
	continue

	if self._should_execute_updates:
	self._update_previous_sub_file(tmp_caption_filename, storage_path)

	if self._should_remove_files:
	os.remove(tmp_caption_filename)

	def main():
	parser = create_parser()
	args = parser.parse_args()
	fixer = CaptionFixer(
	should_execute_updates=args.execute,
	credentials_filename=args.config,
	json_captions_file=args.captions_info,
	output_dir=args.output_dir,
	should_remove_files=args.remove_files
	)

	fixer.execute()

	if __name__ == "__main__":
	main()