ES-Alexander/ssa_encoder.py

## ssa_encoder.py
#!/usr/bin/env python3

def parse(file):
    ''' Generates encoded characters from file byte data.

    Encoding is suitable for embedded [Graphics] in SubStation Alpha files.
    See here for encoding specification and other details:
      http://www.tcax.org/docs/ass-specs.htm

    Bytes are split into groups of 6 bits, then 33 is added to each group
      (which ensures all encoded bytes are printable ascii characters, and not
      lower-case).
    Most bytes are handled in groups of 3, since 3*8 = 24 bits = 4 groups of 6.
    If the file length is not a multiple of 3 bytes the remaining one or two
      bytes are left-shifted, with zero-bits added on the right to get an even
      multiple of 6 bits, which can then be split and encoded normally.

    '''
    encoded_bits = 6
    while (data := file.read(3)):
        if (offset := (len(data) % 3)) != 0:
            if offset == 1:
                # 1 remainder byte
                joined = data[0] << 6
                encoded_characters = 2
            else:
                # 2 remainder bytes
                joined = data[0] << 10 | data[1] << 2
                encoded_characters = 3
        else:
            # 3 bytes (normal)
            joined = data[0] << 16 | data[1] << 8 | data[2]
            encoded_characters = 4
        # yield one encoded character at a time
        yield from (chr(((joined & 0b11_1111 << split) >> split) + 33)
                    for split in range((encoded_characters - 1) * encoded_bits,
                                       -1, -encoded_bits))

def to_lines(parser):
    ''' Yields encoded characters in 80-character lines. '''
    for index, value in enumerate(parser):
        if index and index % 80 == 0:
            yield '\n'
        yield value
    yield '\n'


if __name__ == '__main__':
    from pathlib import Path
    from argparse import ArgumentParser

    parser = ArgumentParser(
        description='Advanced SubStation embedded file encoder')
    parser.add_argument('input_filename', type=Path)
    args = parser.parse_args()

    valid_file_types = ('.bmp', '.jpg', '.gif', '.ico', '.wmf', '.ttf')
    assert (file_type := args.input_filename.suffix) in valid_file_types, \
        f'Unsupported {file_type = } - must be one of {valid_file_types}'

    print('[Graphics]')
    print('filename:', args.input_filename.name)
    with open(args.input_filename, 'rb') as file:
        print(''.join(to_lines(parse(file))))
	#!/usr/bin/env python3

	def parse(file):
	''' Generates encoded characters from file byte data.

	Encoding is suitable for embedded [Graphics] in SubStation Alpha files.
	See here for encoding specification and other details:
	http://www.tcax.org/docs/ass-specs.htm

	Bytes are split into groups of 6 bits, then 33 is added to each group
	(which ensures all encoded bytes are printable ascii characters, and not
	lower-case).
	Most bytes are handled in groups of 3, since 3*8 = 24 bits = 4 groups of 6.
	If the file length is not a multiple of 3 bytes the remaining one or two
	bytes are left-shifted, with zero-bits added on the right to get an even
	multiple of 6 bits, which can then be split and encoded normally.

	'''
	encoded_bits = 6
	while (data := file.read(3)):
	if (offset := (len(data) % 3)) != 0:
	if offset == 1:
	# 1 remainder byte
	joined = data[0] << 6
	encoded_characters = 2
	else:
	# 2 remainder bytes
	joined = data[0] << 10 \| data[1] << 2
	encoded_characters = 3
	else:
	# 3 bytes (normal)
	joined = data[0] << 16 \| data[1] << 8 \| data[2]
	encoded_characters = 4
	# yield one encoded character at a time
	yield from (chr(((joined & 0b11_1111 << split) >> split) + 33)
	for split in range((encoded_characters - 1) * encoded_bits,
	-1, -encoded_bits))

	def to_lines(parser):
	''' Yields encoded characters in 80-character lines. '''
	for index, value in enumerate(parser):
	if index and index % 80 == 0:
	yield '\n'
	yield value
	yield '\n'


	if __name__ == '__main__':
	from pathlib import Path
	from argparse import ArgumentParser

	parser = ArgumentParser(
	description='Advanced SubStation embedded file encoder')
	parser.add_argument('input_filename', type=Path)
	args = parser.parse_args()

	valid_file_types = ('.bmp', '.jpg', '.gif', '.ico', '.wmf', '.ttf')
	assert (file_type := args.input_filename.suffix) in valid_file_types, \
	f'Unsupported {file_type = } - must be one of {valid_file_types}'

	print('[Graphics]')
	print('filename:', args.input_filename.name)
	with open(args.input_filename, 'rb') as file:
	print(''.join(to_lines(parse(file))))