Skip to content

Instantly share code, notes, and snippets.

@mbrezu
Created January 1, 2011 16:43
Show Gist options
  • Save mbrezu/761839 to your computer and use it in GitHub Desktop.
Save mbrezu/761839 to your computer and use it in GitHub Desktop.
A small script to fix SRT times.
# Tries to fix a srt file to match the sub time with the actual dialog time.
# Inputs:
# - the srt file
# - a list of (srt time, spoken dialog time) pairs that will be used
# to infer the parameters for the srt time adjustment
# Outputs:
# - the corrected srt file to standard output
# - the correction parameters to standard error
import sys
import datetime
import re
import math
class SrtEntry(object):
def __init__(self, number, startTime, endTime, text):
self.number = number
self.startTime = startTime
self.endTime = endTime
self.text = text
def __repr__(self):
return "SrtEntry(%d, %lf, %lf, %s)" % (self.number,
self.startTime,
self.endTime,
repr(self.text))
def __str__(self):
def breakTime(t):
h = int(t / 3600)
m = int((t - h * 3600) / 60)
s = int(t - h * 3600 - m * 60)
ms = int((t - math.floor(t)) * 1000)
return [h,m,s,ms]
args = [self.number] + breakTime(self.startTime) + breakTime(self.endTime) + [self.text]
return "%d\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\n%s\n" % tuple(args)
def parseSubtitle(lines):
result = SrtEntry(int(lines[0]), 0, 0, "\n".join([line.strip() for line in lines[2:]]))
pattern = '(\d+):(\d\d):(\d\d),(\d\d\d) --> (\d+):(\d\d):(\d\d),(\d\d\d)'
match = re.search(pattern, lines[1])
result.startTime = int(match.group(1)) * 3600 \
+ int(match.group(2)) * 60 \
+ int(match.group(3)) \
+ int(match.group(4)) / 1000.0
result.endTime = int(match.group(5)) * 3600 \
+ int(match.group(6)) * 60 \
+ int(match.group(7)) \
+ int(match.group(8)) / 1000.0
return result
def parseFile(fileName):
f = file(fileName)
content = f.readlines()
f.close()
subtitles = []
currentSubtitle = []
for line in content:
if line.strip() == "":
if len(currentSubtitle) > 0:
subtitles.append(currentSubtitle)
currentSubtitle = []
else:
currentSubtitle.append(line)
if len(currentSubtitle) > 0:
subtitles.append(currentSubtitle)
return [parseSubtitle(lines) for lines in subtitles]
def parseOneCorrection(line):
pattern = '(\d\d):(\d\d):(\d\d),(\d\d\d) --> (\d\d):(\d\d):(\d\d),(\d\d\d)'
match = re.search(pattern, line)
srtTime = int(match.group(1)) * 3600 \
+ int(match.group(2)) * 60 \
+ int(match.group(3)) \
+ int(match.group(4)) / 1000.0
correctTime = int(match.group(5)) * 3600 \
+ int(match.group(6)) * 60 \
+ int(match.group(7)) \
+ int(match.group(8)) / 1000.0
return (srtTime, correctTime)
def parseCorrections(fileName):
f = file(fileName)
content = f.readlines()
f.close()
return [parseOneCorrection(line) for line in content if line.strip() != ""]
def leastSquares(corrections):
sum_x=0
sum_y=0
sum_xx=0
sum_xy=0
for (x, y) in corrections:
sum_x = sum_x + x
sum_y = sum_y + y
xx = math.pow(x, 2)
sum_xx = sum_xx + xx
xy = x*y
sum_xy = sum_xy + xy
n = len(corrections)
b = (-sum_x * sum_xy + sum_xx * sum_y) / (n * sum_xx-sum_x * sum_x)
a = (-sum_x * sum_y + n * sum_xy) / (n * sum_xx-sum_x * sum_x)
return (a, b)
def processSub(sub, a, b):
return SrtEntry(sub.number,
sub.startTime * a + b,
sub.endTime * a + b,
sub.text)
if __name__ == "__main__":
subs = parseFile(sys.argv[1])
corrections = parseCorrections(sys.argv[2])
a, b = leastSquares(corrections)
sys.stderr.write("%lf, %lf\n" % (a, b))
fixedSubs = [processSub(sub, a, b) for sub in subs]
for sub in fixedSubs:
print sub
@stasinos
Copy link

stasinos commented Oct 31, 2017

Thanks for the useful tool.

A minor fix: srt files give sub-seconds as a decimal value, not necessarily thousandths. This patch worked for me:
--- SRTfix.py.orig 2017-10-31 00:28:23.468220190 +0200
+++ SRTfix.py 2017-10-31 05:54:43.179873302 +0200
@@ -38,16 +38,26 @@

def parseSubtitle(lines):
result = SrtEntry(int(lines[0]), 0, 0, "\n".join([line.strip() for line in lines[2:]]))
- pattern = '(\d+):(\d\d):(\d\d),(\d\d\d) --> (\d+):(\d\d):(\d\d),(\d\d\d)'
+ pattern = '(\d+):(\d\d):(\d\d),(\d+) --> (\d+):(\d\d):(\d\d),(\d+)'
match = re.search(pattern, lines[1])
- result.startTime = int(match.group(1)) * 3600 \
+ try:
+ result.startTime = int(match.group(1)) * 3600 \
+ int(match.group(2)) * 60 \
- + int(match.group(3)) \
- + int(match.group(4)) / 1000.0
- result.endTime = int(match.group(5)) * 3600 \
+ + int(match.group(3))
+ x = int(match.group(4))
+ for i in range( 0, len(match.group(4)) ):
+ x = x / 10
+ result.startTime += x
+ result.endTime = int(match.group(5)) * 3600 \
+ int(match.group(6)) * 60 \
- + int(match.group(7)) \
- + int(match.group(8)) / 1000.0
+ + int(match.group(7))
+ x = int(match.group(8))
+ for i in range( 0, len(match.group(8)) ):
+ x = x / 10
+ result.endTime += x
+ except AttributeError as ex:
+ print( 'Failed tro match ' + lines[1] )
+ raise ex

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment