iamevn/combine_lines.py

## combine_lines.py
#!/usr/bin/env python3

# combine adjacent lines with the same timecodes
import sys
from datetime import datetime, timedelta

def find_nth(string, substring, n, start=0):
    """find nth occurance of substring in string starting at position start.
    (uses string.find) n starts at 1, start starts at 0"""
    found = string.find(substring, start)
    if n == 1 or found == -1:
        return found
    elif n > 1:
        return find_nth(string, substring, n - 1, start=found + 1)

def timecodes(line):
    """line is string in form
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
    returns (Start, End) as a tuple of strings"""
    comma1 = find_nth(line, ',', 1)
    comma2 = find_nth(line, ',', 2)
    comma3 = find_nth(line, ',', 3)

    Start = line[comma1 + 1: comma2]
    End = line[comma2 + 1: comma3]
    return (Start, End)

def text(line):
    """line is string in form
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
    returns Text as a string"""
    last_comma = find_nth(line, ',', 9)
    return line[last_comma + 1:]

def format(line):
    """line is string in form
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
    returns Format as a string"""
    return line[:line.find(':')]

def timecode2datetime(timecode):
    """convert ass timecode to datetime object"""
    return datetime.strptime(timecode+"0000", "%H:%M:%S.%f")

def should_join(A, B):
    """A and B are both tuples containing string start/end times
    returns True if A and B match """
    # return A == B
    DA = [timecode2datetime(t) for t in A]
    DB = [timecode2datetime(t) for t in B]
    # 0.10 sec
    dt = timedelta(0, 0, 100000)
    # 0.5% of the average duration of the lines
    # dt = ((DA[1] - DA[0]) + (DB[1] - DB[0])) / 2 * 0.005
    return abs(DA[0] - DB[0]) <= dt and abs(DA[1] - DB[1]) <= dt


def join_lines(inpath, outpath, JOINER='\\N'):
    lines = list()
    with open(inpath, encoding='utf-8') as infile:
        # seek to [Events] section
        lines.append(infile.readline())
        while lines[-1] != '[Events]\n':
            lines.append(infile.readline())
        lines.append(infile.readline()) # Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
        lines.append(infile.readline()) # first line
        nextline = infile.readline()

        while nextline:
            if format(nextline) == 'Dialogue' \
            and should_join(timecodes(nextline), timecodes(lines[-1])):
                # append this line's text to previous's text
                lines[-1] = lines[-1][:-1] + JOINER + text(nextline)
            else:
                lines.append(nextline)

            nextline = infile.readline()

    with open(outpath, 'w', encoding='utf-8') as outfile:
        for line in lines:
            outfile.write(line)

if __name__ == '__main__':
    if len(sys.argv) != 3:
        sys.exit('Usage: {} infile.ass outfile.ass'.format(sys.argv[0]))

    join_lines(sys.argv[1], sys.argv[2], '\\N')
	#!/usr/bin/env python3

	# combine adjacent lines with the same timecodes
	import sys
	from datetime import datetime, timedelta

	def find_nth(string, substring, n, start=0):
	"""find nth occurance of substring in string starting at position start.
	(uses string.find) n starts at 1, start starts at 0"""
	found = string.find(substring, start)
	if n == 1 or found == -1:
	return found
	elif n > 1:
	return find_nth(string, substring, n - 1, start=found + 1)

	def timecodes(line):
	"""line is string in form
	Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	returns (Start, End) as a tuple of strings"""
	comma1 = find_nth(line, ',', 1)
	comma2 = find_nth(line, ',', 2)
	comma3 = find_nth(line, ',', 3)

	Start = line[comma1 + 1: comma2]
	End = line[comma2 + 1: comma3]
	return (Start, End)

	def text(line):
	"""line is string in form
	Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	returns Text as a string"""
	last_comma = find_nth(line, ',', 9)
	return line[last_comma + 1:]

	def format(line):
	"""line is string in form
	Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	returns Format as a string"""
	return line[:line.find(':')]

	def timecode2datetime(timecode):
	"""convert ass timecode to datetime object"""
	return datetime.strptime(timecode+"0000", "%H:%M:%S.%f")

	def should_join(A, B):
	"""A and B are both tuples containing string start/end times
	returns True if A and B match """
	# return A == B
	DA = [timecode2datetime(t) for t in A]
	DB = [timecode2datetime(t) for t in B]
	# 0.10 sec
	dt = timedelta(0, 0, 100000)
	# 0.5% of the average duration of the lines
	# dt = ((DA[1] - DA[0]) + (DB[1] - DB[0])) / 2 * 0.005
	return abs(DA[0] - DB[0]) <= dt and abs(DA[1] - DB[1]) <= dt


	def join_lines(inpath, outpath, JOINER='\\N'):
	lines = list()
	with open(inpath, encoding='utf-8') as infile:
	# seek to [Events] section
	lines.append(infile.readline())
	while lines[-1] != '[Events]\n':
	lines.append(infile.readline())
	lines.append(infile.readline()) # Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	lines.append(infile.readline()) # first line
	nextline = infile.readline()

	while nextline:
	if format(nextline) == 'Dialogue' \
	and should_join(timecodes(nextline), timecodes(lines[-1])):
	# append this line's text to previous's text
	lines[-1] = lines[-1][:-1] + JOINER + text(nextline)
	else:
	lines.append(nextline)

	nextline = infile.readline()

	with open(outpath, 'w', encoding='utf-8') as outfile:
	for line in lines:
	outfile.write(line)

	if __name__ == '__main__':
	if len(sys.argv) != 3:
	sys.exit('Usage: {} infile.ass outfile.ass'.format(sys.argv[0]))

	join_lines(sys.argv[1], sys.argv[2], '\\N')