Last active
December 6, 2021 05:41
-
-
Save iamevn/b21e60f6a9d572cbc5197a143f46d258 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# combine adjacent lines with the same timecodes | |
import sys | |
from datetime import datetime, timedelta | |
def find_nth(string, substring, n, start=0): | |
"""find nth occurance of substring in string starting at position start. | |
(uses string.find) n starts at 1, start starts at 0""" | |
found = string.find(substring, start) | |
if n == 1 or found == -1: | |
return found | |
elif n > 1: | |
return find_nth(string, substring, n - 1, start=found + 1) | |
def timecodes(line): | |
"""line is string in form | |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
returns (Start, End) as a tuple of strings""" | |
comma1 = find_nth(line, ',', 1) | |
comma2 = find_nth(line, ',', 2) | |
comma3 = find_nth(line, ',', 3) | |
Start = line[comma1 + 1: comma2] | |
End = line[comma2 + 1: comma3] | |
return (Start, End) | |
def text(line): | |
"""line is string in form | |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
returns Text as a string""" | |
last_comma = find_nth(line, ',', 9) | |
return line[last_comma + 1:] | |
def format(line): | |
"""line is string in form | |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
returns Format as a string""" | |
return line[:line.find(':')] | |
def timecode2datetime(timecode): | |
"""convert ass timecode to datetime object""" | |
return datetime.strptime(timecode+"0000", "%H:%M:%S.%f") | |
def should_join(A, B): | |
"""A and B are both tuples containing string start/end times | |
returns True if A and B match """ | |
# return A == B | |
DA = [timecode2datetime(t) for t in A] | |
DB = [timecode2datetime(t) for t in B] | |
# 0.10 sec | |
dt = timedelta(0, 0, 100000) | |
# 0.5% of the average duration of the lines | |
# dt = ((DA[1] - DA[0]) + (DB[1] - DB[0])) / 2 * 0.005 | |
return abs(DA[0] - DB[0]) <= dt and abs(DA[1] - DB[1]) <= dt | |
def join_lines(inpath, outpath, JOINER='\\N'): | |
lines = list() | |
with open(inpath, encoding='utf-8') as infile: | |
# seek to [Events] section | |
lines.append(infile.readline()) | |
while lines[-1] != '[Events]\n': | |
lines.append(infile.readline()) | |
lines.append(infile.readline()) # Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
lines.append(infile.readline()) # first line | |
nextline = infile.readline() | |
while nextline: | |
if format(nextline) == 'Dialogue' \ | |
and should_join(timecodes(nextline), timecodes(lines[-1])): | |
# append this line's text to previous's text | |
lines[-1] = lines[-1][:-1] + JOINER + text(nextline) | |
else: | |
lines.append(nextline) | |
nextline = infile.readline() | |
with open(outpath, 'w', encoding='utf-8') as outfile: | |
for line in lines: | |
outfile.write(line) | |
if __name__ == '__main__': | |
if len(sys.argv) != 3: | |
sys.exit('Usage: {} infile.ass outfile.ass'.format(sys.argv[0])) | |
join_lines(sys.argv[1], sys.argv[2], '\\N') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment