Created
January 26, 2015 19:18
-
-
Save nielsmh/c498d6f5a320f5a08753 to your computer and use it in GitHub Desktop.
EBU STL (Tech3264) file dumper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from struct import Struct | |
import sys | |
from collections import namedtuple | |
import codecs | |
def pick_header_codec(cpn): | |
return codecs.getdecoder(cpn) | |
class iso6937codec: | |
# Not a complete implementation, just good enough for basic testing | |
def __init__(self): | |
self._table = dict(zip( | |
'\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xca\xcb\xce\xcf', | |
u'\u0300\u0301\u0302\u0303\u0304\u0306\u0307\u0308\u030a\u0327\u030b\u0328\u030c' | |
)) | |
def decode(self, str): | |
out = u'' | |
comb = u''; | |
for c in str: | |
if ord(c) < 128: | |
out += unicode(c) | |
else: | |
out += '\\x{:02x}'.format(ord(c)) | |
return (out, len(out)) | |
def pick_sub_codec(cct): | |
from string import maketrans | |
codec = None | |
if cct == '00': | |
codec = iso6937codec().decode | |
elif cct == '01': | |
codec = codecs.getdecoder('iso-8859-5') | |
elif cct == '02': | |
codec = codecs.getdecoder('iso-8859-6') | |
elif cct == '03': | |
codec = codecs.getdecoder('iso-8859-7') | |
elif cct == '04': | |
codec = codecs.getdecoder('iso-8859-8') | |
else: | |
codec = codecs.getdecoder('iso-8859-1') | |
pretrans = maketrans('\x80\x81\x82\x83\x84\x85\x8a', '//__<>\n') | |
return lambda s: codec(s.translate(pretrans, '\x8f')) | |
stlheaderparser = Struct("3s8sc2s2s32s32s32s32s32s32s16s6s6s2s5s5s3s2s2sc8s8scc3s32s32s32s75x576s") | |
StlHeader = namedtuple("StlHeader", "cpn dfc dsc cct lc opt oet tpt tet tn tcd slr cd rd rn tnb tns tng mnc mnr tcs tcp tcf tnd dsn co pub en ecd uda") | |
stlblockparser = Struct("<BHBB4B4BBBB112s") | |
StlBlock = namedtuple("StlBlock", "sgn sn ebn cs tci_h tci_m tci_s tci_f tco_h tco_m tco_s tco_f vp jc cf tf") | |
inf = open(sys.argv[1], 'rb') | |
try: | |
outf = open(sys.argv[2], 'w') | |
except: | |
outf = sys.stdout | |
outf = codecs.getwriter('utf8')(outf, 'strict') | |
rawheader = inf.read(stlheaderparser.size) | |
header = StlHeader._make(stlheaderparser.unpack(rawheader)) | |
headercodec = pick_header_codec(header.cpn) | |
outf.write("Header codepage: %s\n" % header.cpn) | |
outf.write("Disk format code: '%s'\n" % header.dfc) | |
outf.write("Display standard: %s\n" % header.dsc) | |
outf.write("Subtitle encoding: %s\n" % header.cct) | |
outf.write("Subtitle language: %s\n" % header.lc) | |
outf.write("Original pgm. title: '%s'\n" % headercodec(header.opt)[0]) | |
outf.write("Original eps. title: '%s'\n" % headercodec(header.oet)[0]) | |
outf.write("Tlated pgm. title: '%s'\n" % headercodec(header.tpt)[0]) | |
outf.write("Tlated eps. title: '%s'\n" % headercodec(header.tet)[0]) | |
outf.write("Translator name: '%s'\n" % headercodec(header.tn)[0]) | |
outf.write("Translator contact: '%s'\n" % headercodec(header.tcd)[0]) | |
outf.write("Subt.list ref. code: '%s'\n" % headercodec(header.slr)[0]) | |
outf.write("Creation date: %s\n" % header.cd) | |
outf.write("Revision date: %s\n" % header.rd) | |
outf.write("Revision number: %s\n" % header.rn) | |
outf.write("Total # blocks: %s\n" % header.tnb) | |
outf.write("Total # subtitles: %s\n" % header.tns) | |
outf.write("Total # sub. groups: %s\n" % header.tng) | |
outf.write("Max # vis. ch./row: %s\n" % header.mnc) | |
outf.write("Max # vis. rows: %s\n" % header.mnr) | |
outf.write("Status TC: %s\n" % header.tcs) | |
outf.write("Start-of-pgm TC: %s\n" % header.tcp) | |
outf.write("First in-cue TC: %s\n" % header.tcf) | |
outf.write("Disk #/total: %s/%s\n" % (header.dsn, header.tnd)) | |
outf.write("Country of origin: %s\n" % header.co) | |
outf.write("Publisher: '%s'\n" % headercodec(header.pub)[0]) | |
outf.write("Editor's name: '%s'\n" % headercodec(header.en)[0]) | |
outf.write("Editor contact: '%s'\n" % headercodec(header.ecd)[0]) | |
outf.write("User-defined area:\n") | |
outf.write(header.uda) | |
outf.write('\n\n') | |
subcodec = pick_sub_codec(header.cct) | |
while not inf.closed: | |
rawblock = inf.read(stlblockparser.size) | |
if len(rawblock) < stlblockparser.size: | |
break | |
block = StlBlock._make(stlblockparser.unpack(rawblock)) | |
outf.write("BLOCK> SGN=%03d SN=%05d EBN=%03d CS=%03d VP=%03d JC=%03d CF=%d\n" % ( | |
block.sgn, block.sn, block.ebn, block.cs, block.vp, block.jc, block.cf)) | |
if block.sgn != 0: | |
print "(%d) SGN: Non-zero subtitle group number!" % block.sn | |
if block.ebn != 255: | |
print "(%d) EBN: Extension block!" % block.sn | |
if block.cs != 0: | |
print "(%d) CS: Non-zero cumulative status!" % block.sn | |
if block.cf != 0: | |
print "(%d) CF: Comment flag set!" % block.sn | |
outf.write(" TCI=%02d:%02d:%02d:%02d TCO=%02d:%02d:%02d:%02d\n" % ( | |
block.tci_h, block.tci_m, block.tci_s, block.tci_f, block.tco_h, block.tco_m, block.tco_s, block.tco_f)) | |
outf.write(subcodec(block.tf)[0]) | |
outf.write('\n\n') | |
inf.close() | |
outf.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment