MadameMinty/syncabook2srt.py Secret

## syncabook2srt.py
import os
from glob import glob
import xml.etree.ElementTree as ET
from subprocess import call

WORKDIR = r"D:\syncabook\book"
FFMPEG = r"ffmpeg"  # executable path
COVER = r"cover.png"
ACODEC = "libopus -b:a 60k"  # None to copy
# VCODEC = "libx264 -preset veryslow -tune stillimage -crf 44" # slow, pretty, and compatible
# VCODEC = "libx264 -preset veryfast -crf 51"  # fast and compatible
VCODEC = "hevc_nvenc -preset fast -qp 51" # covers go brrr


def glob_files() -> list[tuple[str, str, str]]:
    files_audio = glob('audio/*.*')
    files_smil = glob('smil/*.smil')
    files_xhtml = glob('sync_text/*.xhtml')

    print('Audio files', len(files_audio))
    print(' SMIL files', len(files_smil))
    print('XHTML files', len(files_xhtml))

    if len(files_xhtml) != len(files_smil) or len(files_audio) != len(files_smil):
        print("\x1b[41mERROR\x1b[0m Number of files doesn't match.")
        return []

    print('All sets match.')
    return list(zip(files_audio, files_smil, files_xhtml))


def srt(name, smil_file, xhtml_file):
    # Parse the .smil file
    smil_tree = ET.parse(smil_file)
    smil_root = smil_tree.getroot()

    # Parse the .xhtml file
    xhtml_tree = ET.parse(xhtml_file)
    xhtml_root = xhtml_tree.getroot()

    # Create a dictionary to store the text references
    text_references = {}

    # Find the text references in the .xhtml file
    for span in xhtml_root.iter('{http://www.w3.org/1999/xhtml}span'):
        span_id = span.get('id')
        text = span.text
        text_references[span_id] = text

    # Create a list to store the subtitle entries
    subtitles = []

    # Iterate through the par elements in the .smil file and create subtitle entries
    for par in smil_root.iter('{http://www.w3.org/ns/SMIL}par'):
        # par_id = par.get('id')
        text_src = par.find('{http://www.w3.org/ns/SMIL}text').get('src')
        audio = par.find('{http://www.w3.org/ns/SMIL}audio')
        clip_begin = audio.get('clipBegin')
        clip_end = audio.get('clipEnd')

        text_id = text_src.split('#')[-1]
        text = text_references.get(text_id)

        subtitle = {
            # 'id': par_id,
            'start_time': clip_begin,
            'end_time': clip_end,
            'text': text
        }

        subtitles.append(subtitle)

    # Generate the SRT content
    srt_content = ''
    for i, subtitle in enumerate(subtitles, start=1):
        srt_content += f'{i}\n'
        srt_content += f'{subtitle["start_time"].replace(".", ",").zfill(12)} --> {subtitle["end_time"].replace(".", ",").zfill(12)}\n'
        srt_content += f'{subtitle["text"]}\n\n'

    # Write the SRT content to a file
    srt_file = os.path.join('out', f'{name}.srt')
    with open(srt_file, 'w', encoding='utf-8') as file:
        file.write(srt_content)


def mux(name, audio_file):
    mkv_file = os.path.join('out', f'{name}.mkv')

    # Convert the audio file to MKV
    acodec = ACODEC.split() if ACODEC else ['copy']
    vcodec = VCODEC.split() if VCODEC else ['libx264','-preset','veryfast','-crf','51']
    call([FFMPEG, '-y', '-r', '1', '-loop', '1', '-i', COVER,
          '-i', audio_file, '-shortest', '-c:a'] + acodec + ['-c:v'] + vcodec + [mkv_file])


# main
os.chdir(WORKDIR)
files = glob_files()
for audio_file, smil_file, xhtml_file in files:
    os.makedirs('out', exist_ok=True)
    name = os.path.splitext(os.path.split(audio_file)[-1])[0]
    srt(name, smil_file, xhtml_file)
    mux(name, audio_file)
	import os
	from glob import glob
	import xml.etree.ElementTree as ET
	from subprocess import call

	WORKDIR = r"D:\syncabook\book"
	FFMPEG = r"ffmpeg" # executable path
	COVER = r"cover.png"
	ACODEC = "libopus -b:a 60k" # None to copy
	# VCODEC = "libx264 -preset veryslow -tune stillimage -crf 44" # slow, pretty, and compatible
	# VCODEC = "libx264 -preset veryfast -crf 51" # fast and compatible
	VCODEC = "hevc_nvenc -preset fast -qp 51" # covers go brrr


	def glob_files() -> list[tuple[str, str, str]]:
	files_audio = glob('audio/.')
	files_smil = glob('smil/*.smil')
	files_xhtml = glob('sync_text/*.xhtml')

	print('Audio files', len(files_audio))
	print(' SMIL files', len(files_smil))
	print('XHTML files', len(files_xhtml))

	if len(files_xhtml) != len(files_smil) or len(files_audio) != len(files_smil):
	print("\x1b[41mERROR\x1b[0m Number of files doesn't match.")
	return []

	print('All sets match.')
	return list(zip(files_audio, files_smil, files_xhtml))


	def srt(name, smil_file, xhtml_file):
	# Parse the .smil file
	smil_tree = ET.parse(smil_file)
	smil_root = smil_tree.getroot()

	# Parse the .xhtml file
	xhtml_tree = ET.parse(xhtml_file)
	xhtml_root = xhtml_tree.getroot()

	# Create a dictionary to store the text references
	text_references = {}

	# Find the text references in the .xhtml file
	for span in xhtml_root.iter('{http://www.w3.org/1999/xhtml}span'):
	span_id = span.get('id')
	text = span.text
	text_references[span_id] = text

	# Create a list to store the subtitle entries
	subtitles = []

	# Iterate through the par elements in the .smil file and create subtitle entries
	for par in smil_root.iter('{http://www.w3.org/ns/SMIL}par'):
	# par_id = par.get('id')
	text_src = par.find('{http://www.w3.org/ns/SMIL}text').get('src')
	audio = par.find('{http://www.w3.org/ns/SMIL}audio')
	clip_begin = audio.get('clipBegin')
	clip_end = audio.get('clipEnd')

	text_id = text_src.split('#')[-1]
	text = text_references.get(text_id)

	subtitle = {
	# 'id': par_id,
	'start_time': clip_begin,
	'end_time': clip_end,
	'text': text
	}

	subtitles.append(subtitle)

	# Generate the SRT content
	srt_content = ''
	for i, subtitle in enumerate(subtitles, start=1):
	srt_content += f'{i}\n'
	srt_content += f'{subtitle["start_time"].replace(".", ",").zfill(12)} --> {subtitle["end_time"].replace(".", ",").zfill(12)}\n'
	srt_content += f'{subtitle["text"]}\n\n'

	# Write the SRT content to a file
	srt_file = os.path.join('out', f'{name}.srt')
	with open(srt_file, 'w', encoding='utf-8') as file:
	file.write(srt_content)


	def mux(name, audio_file):
	mkv_file = os.path.join('out', f'{name}.mkv')

	# Convert the audio file to MKV
	acodec = ACODEC.split() if ACODEC else ['copy']
	vcodec = VCODEC.split() if VCODEC else ['libx264','-preset','veryfast','-crf','51']
	call([FFMPEG, '-y', '-r', '1', '-loop', '1', '-i', COVER,
	'-i', audio_file, '-shortest', '-c:a'] + acodec + ['-c:v'] + vcodec + [mkv_file])


	# main
	os.chdir(WORKDIR)
	files = glob_files()
	for audio_file, smil_file, xhtml_file in files:
	os.makedirs('out', exist_ok=True)
	name = os.path.splitext(os.path.split(audio_file)[-1])[0]
	srt(name, smil_file, xhtml_file)
	mux(name, audio_file)