Created
January 1, 2011 16:43
-
-
Save mbrezu/761839 to your computer and use it in GitHub Desktop.
A small script to fix SRT times.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tries to fix a srt file to match the sub time with the actual dialog time. | |
# Inputs: | |
# - the srt file | |
# - a list of (srt time, spoken dialog time) pairs that will be used | |
# to infer the parameters for the srt time adjustment | |
# Outputs: | |
# - the corrected srt file to standard output | |
# - the correction parameters to standard error | |
import sys | |
import datetime | |
import re | |
import math | |
class SrtEntry(object): | |
def __init__(self, number, startTime, endTime, text): | |
self.number = number | |
self.startTime = startTime | |
self.endTime = endTime | |
self.text = text | |
def __repr__(self): | |
return "SrtEntry(%d, %lf, %lf, %s)" % (self.number, | |
self.startTime, | |
self.endTime, | |
repr(self.text)) | |
def __str__(self): | |
def breakTime(t): | |
h = int(t / 3600) | |
m = int((t - h * 3600) / 60) | |
s = int(t - h * 3600 - m * 60) | |
ms = int((t - math.floor(t)) * 1000) | |
return [h,m,s,ms] | |
args = [self.number] + breakTime(self.startTime) + breakTime(self.endTime) + [self.text] | |
return "%d\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\n%s\n" % tuple(args) | |
def parseSubtitle(lines): | |
result = SrtEntry(int(lines[0]), 0, 0, "\n".join([line.strip() for line in lines[2:]])) | |
pattern = '(\d+):(\d\d):(\d\d),(\d\d\d) --> (\d+):(\d\d):(\d\d),(\d\d\d)' | |
match = re.search(pattern, lines[1]) | |
result.startTime = int(match.group(1)) * 3600 \ | |
+ int(match.group(2)) * 60 \ | |
+ int(match.group(3)) \ | |
+ int(match.group(4)) / 1000.0 | |
result.endTime = int(match.group(5)) * 3600 \ | |
+ int(match.group(6)) * 60 \ | |
+ int(match.group(7)) \ | |
+ int(match.group(8)) / 1000.0 | |
return result | |
def parseFile(fileName): | |
f = file(fileName) | |
content = f.readlines() | |
f.close() | |
subtitles = [] | |
currentSubtitle = [] | |
for line in content: | |
if line.strip() == "": | |
if len(currentSubtitle) > 0: | |
subtitles.append(currentSubtitle) | |
currentSubtitle = [] | |
else: | |
currentSubtitle.append(line) | |
if len(currentSubtitle) > 0: | |
subtitles.append(currentSubtitle) | |
return [parseSubtitle(lines) for lines in subtitles] | |
def parseOneCorrection(line): | |
pattern = '(\d\d):(\d\d):(\d\d),(\d\d\d) --> (\d\d):(\d\d):(\d\d),(\d\d\d)' | |
match = re.search(pattern, line) | |
srtTime = int(match.group(1)) * 3600 \ | |
+ int(match.group(2)) * 60 \ | |
+ int(match.group(3)) \ | |
+ int(match.group(4)) / 1000.0 | |
correctTime = int(match.group(5)) * 3600 \ | |
+ int(match.group(6)) * 60 \ | |
+ int(match.group(7)) \ | |
+ int(match.group(8)) / 1000.0 | |
return (srtTime, correctTime) | |
def parseCorrections(fileName): | |
f = file(fileName) | |
content = f.readlines() | |
f.close() | |
return [parseOneCorrection(line) for line in content if line.strip() != ""] | |
def leastSquares(corrections): | |
sum_x=0 | |
sum_y=0 | |
sum_xx=0 | |
sum_xy=0 | |
for (x, y) in corrections: | |
sum_x = sum_x + x | |
sum_y = sum_y + y | |
xx = math.pow(x, 2) | |
sum_xx = sum_xx + xx | |
xy = x*y | |
sum_xy = sum_xy + xy | |
n = len(corrections) | |
b = (-sum_x * sum_xy + sum_xx * sum_y) / (n * sum_xx-sum_x * sum_x) | |
a = (-sum_x * sum_y + n * sum_xy) / (n * sum_xx-sum_x * sum_x) | |
return (a, b) | |
def processSub(sub, a, b): | |
return SrtEntry(sub.number, | |
sub.startTime * a + b, | |
sub.endTime * a + b, | |
sub.text) | |
if __name__ == "__main__": | |
subs = parseFile(sys.argv[1]) | |
corrections = parseCorrections(sys.argv[2]) | |
a, b = leastSquares(corrections) | |
sys.stderr.write("%lf, %lf\n" % (a, b)) | |
fixedSubs = [processSub(sub, a, b) for sub in subs] | |
for sub in fixedSubs: | |
print sub |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for the useful tool.
A minor fix: srt files give sub-seconds as a decimal value, not necessarily thousandths. This patch worked for me:
--- SRTfix.py.orig 2017-10-31 00:28:23.468220190 +0200
+++ SRTfix.py 2017-10-31 05:54:43.179873302 +0200
@@ -38,16 +38,26 @@
def parseSubtitle(lines):
result = SrtEntry(int(lines[0]), 0, 0, "\n".join([line.strip() for line in lines[2:]]))
- pattern = '(\d+):(\d\d):(\d\d),(\d\d\d) --> (\d+):(\d\d):(\d\d),(\d\d\d)'
+ pattern = '(\d+):(\d\d):(\d\d),(\d+) --> (\d+):(\d\d):(\d\d),(\d+)'
match = re.search(pattern, lines[1])
- result.startTime = int(match.group(1)) * 3600 \
+ try:
+ result.startTime = int(match.group(1)) * 3600 \
+ int(match.group(2)) * 60 \
- + int(match.group(3)) \
- + int(match.group(4)) / 1000.0
- result.endTime = int(match.group(5)) * 3600 \
+ + int(match.group(3))
+ x = int(match.group(4))
+ for i in range( 0, len(match.group(4)) ):
+ x = x / 10
+ result.startTime += x
+ result.endTime = int(match.group(5)) * 3600 \
+ int(match.group(6)) * 60 \
- + int(match.group(7)) \
- + int(match.group(8)) / 1000.0
+ + int(match.group(7))
+ x = int(match.group(8))
+ for i in range( 0, len(match.group(8)) ):
+ x = x / 10
+ result.endTime += x
+ except AttributeError as ex:
+ print( 'Failed tro match ' + lines[1] )
+ raise ex