Skip to content

Instantly share code, notes, and snippets.

@PinkPandaKatie
Created October 5, 2016 23:25
Show Gist options
  • Save PinkPandaKatie/123fc7c785a38037f827762f991c5d8a to your computer and use it in GitHub Desktop.
Save PinkPandaKatie/123fc7c785a38037f827762f991c5d8a to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import sys
import re
import argparse
rxnum = re.compile(r'^(\d+)$')
rxtime = re.compile(r'(\d+):(\d+):(\d+),(\d+) --> (\d+):(\d+):(\d+),(\d+)')
rxcap = re.compile(r'-?\([A-Z\s0-9-]+\)')
def main():
p = argparse.ArgumentParser(description='')
p.add_argument('source')
p.add_argument('-d', '--delay', action='store_true', help='Delay each subtitle by its duration')
p.add_argument('-c', '--removecap', action='store_true', help='Try to remove SDH captions')
args = p.parse_args()
with open(args.source, 'r', encoding='utf-8') as fp:
subs = readsrt(fp)
if args.removecap:
subs = removecap(subs)
if args.delay:
subs = delaysubs(subs)
writesrt(sys.stdout, subs)
def delaysubs(src):
out = []
for i, (begin, end, text) in enumerate(src):
dur = min(end - begin, 3)
begin = end
end = begin + dur
try:
nb = src[i+1][1]
if end > nb:
end = nb
except IndexError:
pass
out.append((begin, end, text))
return out
def removecap(src):
out = []
for i, (begin, end, text) in enumerate(src):
ntxt = '\n'.join(text)
ntxt = rxcap.sub('', ntxt)
if ntxt.strip():
out.append((begin, end, ntxt.split('\n')))
return out
def srttime(t):
secs = int(t)
frac = t - secs
mins = secs / 60
secs %= 60
hrs = mins / 60
mins %= 60
return '%02d:%02d:%02d,%03d' % (hrs, mins, secs, int(frac * 1000))
def readsrt(inf):
out = []
cursub = None
for l in inf:
l = l.strip()
if rxnum.match(l):
cursub = None
continue
m = rxtime.match(l)
if m:
bh, bm, bs, bf, eh, em, es, ef = m.groups()
begin = (int(bh) * 3600 + int(bm) * 60 + float(bs + '.' + bf))
end = (int(eh) * 3600 + int(em) * 60 + float(es + '.' + ef))
cursub = []
out.append((begin, end, cursub))
continue
if cursub is not None:
cursub.append(l)
return out
def writesrt(outf, subs):
for i, (begin, end, text) in enumerate(subs):
outf.write('%d\n' % (i + 1))
outf.write('%s --> %s\n' % (srttime(begin), srttime(end)))
for l in text:
outf.write('%s\n' % l)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment