Skip to content

Instantly share code, notes, and snippets.

@nielsmh
Created January 26, 2015 19:18
Show Gist options
  • Save nielsmh/c498d6f5a320f5a08753 to your computer and use it in GitHub Desktop.
Save nielsmh/c498d6f5a320f5a08753 to your computer and use it in GitHub Desktop.
EBU STL (Tech3264) file dumper
from struct import Struct
import sys
from collections import namedtuple
import codecs
def pick_header_codec(cpn):
return codecs.getdecoder(cpn)
class iso6937codec:
# Not a complete implementation, just good enough for basic testing
def __init__(self):
self._table = dict(zip(
'\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xca\xcb\xce\xcf',
u'\u0300\u0301\u0302\u0303\u0304\u0306\u0307\u0308\u030a\u0327\u030b\u0328\u030c'
))
def decode(self, str):
out = u''
comb = u'';
for c in str:
if ord(c) < 128:
out += unicode(c)
else:
out += '\\x{:02x}'.format(ord(c))
return (out, len(out))
def pick_sub_codec(cct):
from string import maketrans
codec = None
if cct == '00':
codec = iso6937codec().decode
elif cct == '01':
codec = codecs.getdecoder('iso-8859-5')
elif cct == '02':
codec = codecs.getdecoder('iso-8859-6')
elif cct == '03':
codec = codecs.getdecoder('iso-8859-7')
elif cct == '04':
codec = codecs.getdecoder('iso-8859-8')
else:
codec = codecs.getdecoder('iso-8859-1')
pretrans = maketrans('\x80\x81\x82\x83\x84\x85\x8a', '//__<>\n')
return lambda s: codec(s.translate(pretrans, '\x8f'))
stlheaderparser = Struct("3s8sc2s2s32s32s32s32s32s32s16s6s6s2s5s5s3s2s2sc8s8scc3s32s32s32s75x576s")
StlHeader = namedtuple("StlHeader", "cpn dfc dsc cct lc opt oet tpt tet tn tcd slr cd rd rn tnb tns tng mnc mnr tcs tcp tcf tnd dsn co pub en ecd uda")
stlblockparser = Struct("<BHBB4B4BBBB112s")
StlBlock = namedtuple("StlBlock", "sgn sn ebn cs tci_h tci_m tci_s tci_f tco_h tco_m tco_s tco_f vp jc cf tf")
inf = open(sys.argv[1], 'rb')
try:
outf = open(sys.argv[2], 'w')
except:
outf = sys.stdout
outf = codecs.getwriter('utf8')(outf, 'strict')
rawheader = inf.read(stlheaderparser.size)
header = StlHeader._make(stlheaderparser.unpack(rawheader))
headercodec = pick_header_codec(header.cpn)
outf.write("Header codepage: %s\n" % header.cpn)
outf.write("Disk format code: '%s'\n" % header.dfc)
outf.write("Display standard: %s\n" % header.dsc)
outf.write("Subtitle encoding: %s\n" % header.cct)
outf.write("Subtitle language: %s\n" % header.lc)
outf.write("Original pgm. title: '%s'\n" % headercodec(header.opt)[0])
outf.write("Original eps. title: '%s'\n" % headercodec(header.oet)[0])
outf.write("Tlated pgm. title: '%s'\n" % headercodec(header.tpt)[0])
outf.write("Tlated eps. title: '%s'\n" % headercodec(header.tet)[0])
outf.write("Translator name: '%s'\n" % headercodec(header.tn)[0])
outf.write("Translator contact: '%s'\n" % headercodec(header.tcd)[0])
outf.write("Subt.list ref. code: '%s'\n" % headercodec(header.slr)[0])
outf.write("Creation date: %s\n" % header.cd)
outf.write("Revision date: %s\n" % header.rd)
outf.write("Revision number: %s\n" % header.rn)
outf.write("Total # blocks: %s\n" % header.tnb)
outf.write("Total # subtitles: %s\n" % header.tns)
outf.write("Total # sub. groups: %s\n" % header.tng)
outf.write("Max # vis. ch./row: %s\n" % header.mnc)
outf.write("Max # vis. rows: %s\n" % header.mnr)
outf.write("Status TC: %s\n" % header.tcs)
outf.write("Start-of-pgm TC: %s\n" % header.tcp)
outf.write("First in-cue TC: %s\n" % header.tcf)
outf.write("Disk #/total: %s/%s\n" % (header.dsn, header.tnd))
outf.write("Country of origin: %s\n" % header.co)
outf.write("Publisher: '%s'\n" % headercodec(header.pub)[0])
outf.write("Editor's name: '%s'\n" % headercodec(header.en)[0])
outf.write("Editor contact: '%s'\n" % headercodec(header.ecd)[0])
outf.write("User-defined area:\n")
outf.write(header.uda)
outf.write('\n\n')
subcodec = pick_sub_codec(header.cct)
while not inf.closed:
rawblock = inf.read(stlblockparser.size)
if len(rawblock) < stlblockparser.size:
break
block = StlBlock._make(stlblockparser.unpack(rawblock))
outf.write("BLOCK> SGN=%03d SN=%05d EBN=%03d CS=%03d VP=%03d JC=%03d CF=%d\n" % (
block.sgn, block.sn, block.ebn, block.cs, block.vp, block.jc, block.cf))
if block.sgn != 0:
print "(%d) SGN: Non-zero subtitle group number!" % block.sn
if block.ebn != 255:
print "(%d) EBN: Extension block!" % block.sn
if block.cs != 0:
print "(%d) CS: Non-zero cumulative status!" % block.sn
if block.cf != 0:
print "(%d) CF: Comment flag set!" % block.sn
outf.write(" TCI=%02d:%02d:%02d:%02d TCO=%02d:%02d:%02d:%02d\n" % (
block.tci_h, block.tci_m, block.tci_s, block.tci_f, block.tco_h, block.tco_m, block.tco_s, block.tco_f))
outf.write(subcodec(block.tf)[0])
outf.write('\n\n')
inf.close()
outf.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment