nielsmh/readstl.py

## readstl.py
from struct import Struct
import sys
from collections import namedtuple
import codecs

def pick_header_codec(cpn):
  return codecs.getdecoder(cpn)

class iso6937codec:
  # Not a complete implementation, just good enough for basic testing
  def __init__(self):
    self._table = dict(zip(
      '\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xca\xcb\xce\xcf',
      u'\u0300\u0301\u0302\u0303\u0304\u0306\u0307\u0308\u030a\u0327\u030b\u0328\u030c'
      ))

  def decode(self, str):
    out = u''
    comb = u'';
    for c in str:
      if ord(c) < 128:
        out += unicode(c)
      else:
        out += '\\x{:02x}'.format(ord(c))
    return (out, len(out))

def pick_sub_codec(cct):
  from string import maketrans
  codec = None
  if cct == '00':
    codec = iso6937codec().decode
  elif cct == '01':
    codec = codecs.getdecoder('iso-8859-5')
  elif cct == '02':
    codec = codecs.getdecoder('iso-8859-6')
  elif cct == '03':
    codec = codecs.getdecoder('iso-8859-7')
  elif cct == '04':
    codec = codecs.getdecoder('iso-8859-8')
  else:
    codec = codecs.getdecoder('iso-8859-1')
  pretrans = maketrans('\x80\x81\x82\x83\x84\x85\x8a', '//__<>\n')
  return lambda s: codec(s.translate(pretrans, '\x8f'))

stlheaderparser = Struct("3s8sc2s2s32s32s32s32s32s32s16s6s6s2s5s5s3s2s2sc8s8scc3s32s32s32s75x576s")
StlHeader = namedtuple("StlHeader", "cpn dfc dsc cct lc opt oet tpt tet tn tcd slr cd rd rn tnb tns tng mnc mnr tcs tcp tcf tnd dsn co pub en ecd uda")
stlblockparser = Struct("<BHBB4B4BBBB112s")
StlBlock = namedtuple("StlBlock", "sgn sn ebn cs tci_h tci_m tci_s tci_f tco_h tco_m tco_s tco_f vp jc cf tf")


inf = open(sys.argv[1], 'rb')
try:
  outf = open(sys.argv[2], 'w')
except:
  outf = sys.stdout
outf = codecs.getwriter('utf8')(outf, 'strict')

rawheader = inf.read(stlheaderparser.size)
header = StlHeader._make(stlheaderparser.unpack(rawheader))
headercodec = pick_header_codec(header.cpn)
outf.write("Header codepage:      %s\n" % header.cpn)
outf.write("Disk format code:    '%s'\n" % header.dfc)
outf.write("Display standard:     %s\n" % header.dsc)
outf.write("Subtitle encoding:    %s\n" % header.cct)
outf.write("Subtitle language:    %s\n" % header.lc)
outf.write("Original pgm. title: '%s'\n" % headercodec(header.opt)[0])
outf.write("Original eps. title: '%s'\n" % headercodec(header.oet)[0])
outf.write("Tlated pgm. title:   '%s'\n" % headercodec(header.tpt)[0])
outf.write("Tlated eps. title:   '%s'\n" % headercodec(header.tet)[0])
outf.write("Translator name:     '%s'\n" % headercodec(header.tn)[0])
outf.write("Translator contact:  '%s'\n" % headercodec(header.tcd)[0])
outf.write("Subt.list ref. code: '%s'\n" % headercodec(header.slr)[0])
outf.write("Creation date:        %s\n" % header.cd)
outf.write("Revision date:        %s\n" % header.rd)
outf.write("Revision number:      %s\n" % header.rn)
outf.write("Total # blocks:       %s\n" % header.tnb)
outf.write("Total # subtitles:    %s\n" % header.tns)
outf.write("Total # sub. groups:  %s\n" % header.tng)
outf.write("Max # vis. ch./row:   %s\n" % header.mnc)
outf.write("Max # vis. rows:      %s\n" % header.mnr)
outf.write("Status TC:            %s\n" % header.tcs)
outf.write("Start-of-pgm TC:      %s\n" % header.tcp)
outf.write("First in-cue TC:      %s\n" % header.tcf)
outf.write("Disk #/total:         %s/%s\n" % (header.dsn, header.tnd))
outf.write("Country of origin:    %s\n" % header.co)
outf.write("Publisher:           '%s'\n" % headercodec(header.pub)[0])
outf.write("Editor's name:       '%s'\n" % headercodec(header.en)[0])
outf.write("Editor contact:      '%s'\n" % headercodec(header.ecd)[0])
outf.write("User-defined area:\n")
outf.write(header.uda)
outf.write('\n\n')

subcodec = pick_sub_codec(header.cct)

while not inf.closed:
  rawblock = inf.read(stlblockparser.size)
  if len(rawblock) < stlblockparser.size:
    break
  block = StlBlock._make(stlblockparser.unpack(rawblock))
  outf.write("BLOCK>  SGN=%03d  SN=%05d  EBN=%03d  CS=%03d  VP=%03d  JC=%03d  CF=%d\n" % (
    block.sgn, block.sn, block.ebn, block.cs, block.vp, block.jc, block.cf))
  if block.sgn != 0:
    print "(%d) SGN: Non-zero subtitle group number!" % block.sn
  if block.ebn != 255:
    print "(%d) EBN: Extension block!" % block.sn
  if block.cs != 0:
    print "(%d) CS: Non-zero cumulative status!" % block.sn
  if block.cf != 0:
    print "(%d) CF: Comment flag set!" % block.sn
  outf.write("        TCI=%02d:%02d:%02d:%02d  TCO=%02d:%02d:%02d:%02d\n" % (
    block.tci_h, block.tci_m, block.tci_s, block.tci_f, block.tco_h, block.tco_m, block.tco_s, block.tco_f))
  outf.write(subcodec(block.tf)[0])
  outf.write('\n\n')

inf.close()
outf.close()
	from struct import Struct
	import sys
	from collections import namedtuple
	import codecs

	def pick_header_codec(cpn):
	return codecs.getdecoder(cpn)

	class iso6937codec:
	# Not a complete implementation, just good enough for basic testing
	def __init__(self):
	self._table = dict(zip(
	'\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xca\xcb\xce\xcf',
	u'\u0300\u0301\u0302\u0303\u0304\u0306\u0307\u0308\u030a\u0327\u030b\u0328\u030c'
	))

	def decode(self, str):
	out = u''
	comb = u'';
	for c in str:
	if ord(c) < 128:
	out += unicode(c)
	else:
	out += '\\x{:02x}'.format(ord(c))
	return (out, len(out))

	def pick_sub_codec(cct):
	from string import maketrans
	codec = None
	if cct == '00':
	codec = iso6937codec().decode
	elif cct == '01':
	codec = codecs.getdecoder('iso-8859-5')
	elif cct == '02':
	codec = codecs.getdecoder('iso-8859-6')
	elif cct == '03':
	codec = codecs.getdecoder('iso-8859-7')
	elif cct == '04':
	codec = codecs.getdecoder('iso-8859-8')
	else:
	codec = codecs.getdecoder('iso-8859-1')
	pretrans = maketrans('\x80\x81\x82\x83\x84\x85\x8a', '//__<>\n')
	return lambda s: codec(s.translate(pretrans, '\x8f'))

	stlheaderparser = Struct("3s8sc2s2s32s32s32s32s32s32s16s6s6s2s5s5s3s2s2sc8s8scc3s32s32s32s75x576s")
	StlHeader = namedtuple("StlHeader", "cpn dfc dsc cct lc opt oet tpt tet tn tcd slr cd rd rn tnb tns tng mnc mnr tcs tcp tcf tnd dsn co pub en ecd uda")
	stlblockparser = Struct("<BHBB4B4BBBB112s")
	StlBlock = namedtuple("StlBlock", "sgn sn ebn cs tci_h tci_m tci_s tci_f tco_h tco_m tco_s tco_f vp jc cf tf")


	inf = open(sys.argv[1], 'rb')
	try:
	outf = open(sys.argv[2], 'w')
	except:
	outf = sys.stdout
	outf = codecs.getwriter('utf8')(outf, 'strict')

	rawheader = inf.read(stlheaderparser.size)
	header = StlHeader._make(stlheaderparser.unpack(rawheader))
	headercodec = pick_header_codec(header.cpn)
	outf.write("Header codepage: %s\n" % header.cpn)
	outf.write("Disk format code: '%s'\n" % header.dfc)
	outf.write("Display standard: %s\n" % header.dsc)
	outf.write("Subtitle encoding: %s\n" % header.cct)
	outf.write("Subtitle language: %s\n" % header.lc)
	outf.write("Original pgm. title: '%s'\n" % headercodec(header.opt)[0])
	outf.write("Original eps. title: '%s'\n" % headercodec(header.oet)[0])
	outf.write("Tlated pgm. title: '%s'\n" % headercodec(header.tpt)[0])
	outf.write("Tlated eps. title: '%s'\n" % headercodec(header.tet)[0])
	outf.write("Translator name: '%s'\n" % headercodec(header.tn)[0])
	outf.write("Translator contact: '%s'\n" % headercodec(header.tcd)[0])
	outf.write("Subt.list ref. code: '%s'\n" % headercodec(header.slr)[0])
	outf.write("Creation date: %s\n" % header.cd)
	outf.write("Revision date: %s\n" % header.rd)
	outf.write("Revision number: %s\n" % header.rn)
	outf.write("Total # blocks: %s\n" % header.tnb)
	outf.write("Total # subtitles: %s\n" % header.tns)
	outf.write("Total # sub. groups: %s\n" % header.tng)
	outf.write("Max # vis. ch./row: %s\n" % header.mnc)
	outf.write("Max # vis. rows: %s\n" % header.mnr)
	outf.write("Status TC: %s\n" % header.tcs)
	outf.write("Start-of-pgm TC: %s\n" % header.tcp)
	outf.write("First in-cue TC: %s\n" % header.tcf)
	outf.write("Disk #/total: %s/%s\n" % (header.dsn, header.tnd))
	outf.write("Country of origin: %s\n" % header.co)
	outf.write("Publisher: '%s'\n" % headercodec(header.pub)[0])
	outf.write("Editor's name: '%s'\n" % headercodec(header.en)[0])
	outf.write("Editor contact: '%s'\n" % headercodec(header.ecd)[0])
	outf.write("User-defined area:\n")
	outf.write(header.uda)
	outf.write('\n\n')

	subcodec = pick_sub_codec(header.cct)

	while not inf.closed:
	rawblock = inf.read(stlblockparser.size)
	if len(rawblock) < stlblockparser.size:
	break
	block = StlBlock._make(stlblockparser.unpack(rawblock))
	outf.write("BLOCK> SGN=%03d SN=%05d EBN=%03d CS=%03d VP=%03d JC=%03d CF=%d\n" % (
	block.sgn, block.sn, block.ebn, block.cs, block.vp, block.jc, block.cf))
	if block.sgn != 0:
	print "(%d) SGN: Non-zero subtitle group number!" % block.sn
	if block.ebn != 255:
	print "(%d) EBN: Extension block!" % block.sn
	if block.cs != 0:
	print "(%d) CS: Non-zero cumulative status!" % block.sn
	if block.cf != 0:
	print "(%d) CF: Comment flag set!" % block.sn
	outf.write(" TCI=%02d:%02d:%02d:%02d TCO=%02d:%02d:%02d:%02d\n" % (
	block.tci_h, block.tci_m, block.tci_s, block.tci_f, block.tco_h, block.tco_m, block.tco_s, block.tco_f))
	outf.write(subcodec(block.tf)[0])
	outf.write('\n\n')

	inf.close()
	outf.close()