BrianZbr/main.py

## main.py
import os
import argparse
import requests
import json
import base64
from xml.dom.minidom import parse

def get_image_from_xml(nodelist):
    rc = []
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc.append(node.data)
    return ''.join(rc)

def get_api_key_from_env():
    if not 'GV_API_KEY' in os.environ:
        try:
            from dotenv import load_dotenv
            load_dotenv()
        except ModuleNotFoundError:
            print("Could not import 'dotenv' module!")

    return os.getenv('GV_API_KEY')


    if not api_key:
        raise ValueError("Valid API key could not be found! You may either: 1) Provide an API key via command argument, " +
              "2) set the environment variable GV_API_KEY, or 3) install 'dotenv' module to read a .env file.")

def extract_from_sub(inputfile,
                     forcedonly=False):
    '''Run subp2png on inputfile'''
    if not (inputfile.endswith('.sub')):
        raise ValueError("Please supply the path/filename of a .sub input file.")
    elif not os.path.exists(inputfile):
        raise ValueError(f"Input file {inputfile} does not exist in working directory or specified path.")

    print(f"Processing file with subp2png, please be patient...")
    subprocess.call("subp2png -n " + ("--forced " if forcedonly else "") + inputfile + " > /dev/null", shell=True)

def gv_ocr_image(inputfile, api_key):
    '''fetch text from image with Google Vision API'''
    with open(inputfile, 'rb') as img_file:
        img_string = base64.b64encode(img_file.read())

    response = requests.post(url='https://vision.googleapis.com/v1/images:annotate',
                             params={'key': api_key},
                             headers={'Content-Type': 'application/json'},
                             data=json.dumps({
                                 'requests': [{
                                     'image': {
                                         'content': img_string.decode()
                                     },
                                     'features': [{
                                         'type': 'TEXT_DETECTION',
                                         'maxResults': 1
                                     }]
                                 }]
                             }))

    return eval(response.content.decode())['responses'][0]['textAnnotations'][0]['description']

def main():
    parser = argparse.ArgumentParser(description='Convert VOBsub to .srt using Google Vision API')
    parser.add_argument('input_filename', help='Name of input file in VOBsub format (can be the .sub or .idx but both must be '+
                                           'present with same base filename)')
    parser.add_argument('--api_key', default='', help='Google Vision API key (default: looks for GV_API_KEY, will set from in .env file if not set')

    args = parser.parse_args()

    if args.api_key:
        api_key = args.api_key
    else:
        api_key = get_api_key_from_env()

    xml_file = args.input_filename.replace(".sub", ".xml")

    if os.path.exists(xml_file):
        print(f"Found {xml_file}")
    else:
        extract_from_sub(args.input_filename)

    dom = parse(xml_file)

    count = 1
    srt_outstring = ""
    subtitles = dom.getElementsByTagName("subtitle")

    user_response = input(f"Query Google Vision API on {len(subtitles)} images? Enter 'Yes' to proceed!: ")
    if not user_response == "Yes":
        print("Aborting!")
        exit()

    for subtitle in dom.getElementsByTagName("subtitle"):
        if not subtitle.attributes.__contains__("start") or not subtitle.attributes.__contains__("stop"):
            continue
        image = get_image_from_xml(subtitle.getElementsByTagName("image")[0].childNodes)
        image_text = gv_ocr_image(image, api_key)
        srt_outstring += f"{count}\n" \
                   f"{subtitle.getAttribute('start').replace('.', ',')} --> " \
                   f"{subtitle.getAttribute('stop').replace('.', ',')}\n" \
                   f"{image_text}\n\n"
        count += 1

    srt_filename = args.input_filename.replace(".sub", ".srt")
    with open(srt_filename, "wt") as srt_file:
        srt_file.write(srt_outstring)

if __name__ == '__main__':
    main()
	import os
	import argparse
	import requests
	import json
	import base64
	from xml.dom.minidom import parse

	def get_image_from_xml(nodelist):
	rc = []
	for node in nodelist:
	if node.nodeType == node.TEXT_NODE:
	rc.append(node.data)
	return ''.join(rc)

	def get_api_key_from_env():
	if not 'GV_API_KEY' in os.environ:
	try:
	from dotenv import load_dotenv
	load_dotenv()
	except ModuleNotFoundError:
	print("Could not import 'dotenv' module!")

	return os.getenv('GV_API_KEY')


	if not api_key:
	raise ValueError("Valid API key could not be found! You may either: 1) Provide an API key via command argument, " +
	"2) set the environment variable GV_API_KEY, or 3) install 'dotenv' module to read a .env file.")

	def extract_from_sub(inputfile,
	forcedonly=False):
	'''Run subp2png on inputfile'''
	if not (inputfile.endswith('.sub')):
	raise ValueError("Please supply the path/filename of a .sub input file.")
	elif not os.path.exists(inputfile):
	raise ValueError(f"Input file {inputfile} does not exist in working directory or specified path.")

	print(f"Processing file with subp2png, please be patient...")
	subprocess.call("subp2png -n " + ("--forced " if forcedonly else "") + inputfile + " > /dev/null", shell=True)

	def gv_ocr_image(inputfile, api_key):
	'''fetch text from image with Google Vision API'''
	with open(inputfile, 'rb') as img_file:
	img_string = base64.b64encode(img_file.read())

	response = requests.post(url='https://vision.googleapis.com/v1/images:annotate',
	params={'key': api_key},
	headers={'Content-Type': 'application/json'},
	data=json.dumps({
	'requests': [{
	'image': {
	'content': img_string.decode()
	},
	'features': [{
	'type': 'TEXT_DETECTION',
	'maxResults': 1
	}]
	}]
	}))

	return eval(response.content.decode())['responses'][0]['textAnnotations'][0]['description']

	def main():
	parser = argparse.ArgumentParser(description='Convert VOBsub to .srt using Google Vision API')
	parser.add_argument('input_filename', help='Name of input file in VOBsub format (can be the .sub or .idx but both must be '+
	'present with same base filename)')
	parser.add_argument('--api_key', default='', help='Google Vision API key (default: looks for GV_API_KEY, will set from in .env file if not set')

	args = parser.parse_args()

	if args.api_key:
	api_key = args.api_key
	else:
	api_key = get_api_key_from_env()

	xml_file = args.input_filename.replace(".sub", ".xml")

	if os.path.exists(xml_file):
	print(f"Found {xml_file}")
	else:
	extract_from_sub(args.input_filename)

	dom = parse(xml_file)

	count = 1
	srt_outstring = ""
	subtitles = dom.getElementsByTagName("subtitle")

	user_response = input(f"Query Google Vision API on {len(subtitles)} images? Enter 'Yes' to proceed!: ")
	if not user_response == "Yes":
	print("Aborting!")
	exit()

	for subtitle in dom.getElementsByTagName("subtitle"):
	if not subtitle.attributes.__contains__("start") or not subtitle.attributes.__contains__("stop"):
	continue
	image = get_image_from_xml(subtitle.getElementsByTagName("image")[0].childNodes)
	image_text = gv_ocr_image(image, api_key)
	srt_outstring += f"{count}\n" \
	f"{subtitle.getAttribute('start').replace('.', ',')} --> " \
	f"{subtitle.getAttribute('stop').replace('.', ',')}\n" \
	f"{image_text}\n\n"
	count += 1

	srt_filename = args.input_filename.replace(".sub", ".srt")
	with open(srt_filename, "wt") as srt_file:
	srt_file.write(srt_outstring)

	if __name__ == '__main__':
	main()