Skip to content

Instantly share code, notes, and snippets.

@sigio
Created February 29, 2024 23:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sigio/0f8fa1a933a8afcf67b9290c1c96cd04 to your computer and use it in GitHub Desktop.
Save sigio/0f8fa1a933a8afcf67b9290c1c96cd04 to your computer and use it in GitHub Desktop.
Fix hidive srt subs for mkv
#!/usr/bin/env python3
#
# Fix (hidive) srt subtitles for mkv player
# These subs seem to have multi-line captions with a 1ms start offset
# mpv then prints these in the wrong order (issue #7070)
# This script will merge subtitle lines correctly for mpv
#
# Public domain (created mostly with ChatGPT)
#
import argparse
def read_srt(input_file):
with open(input_file, 'r') as f:
lines = f.readlines()
captions = []
current_caption = None
index = 1 # Initialize index counter
for line in lines:
line = line.strip()
# Skip lines containing only a single integer
if not line or line.isdigit():
continue
# print("Processing line:", line) # Debug print
# If the line contains timestamps
if '-->' in line:
# If there's an ongoing caption, add it to the captions list
if current_caption:
captions.append(current_caption)
current_caption = None
start, end = line.split(' --> ')
current_caption = {"index": str(index), "start": start.strip(), "end": end.strip(), "text": []}
index += 1 # Increment index for next caption
else:
# If it's not a timestamp line, it must be part of the caption text
if current_caption is not None:
current_caption["text"].append(line)
# Append the last caption if there's one remaining
if current_caption:
captions.append(current_caption)
# print("Captions:", captions) # Debug print
return captions
def write_srt(output_file, captions):
with open(output_file, 'w') as f:
for caption in captions:
# Skip captions without a valid index
if "index" not in caption or caption["index"] is None:
continue
# Write index line only for the first caption in a merged group
if caption.get("merged", False) is False:
f.write(caption["index"] + '\n')
f.write(caption["start"] + ' --> ' + caption["end"] + '\n')
for line in caption["text"]:
f.write(line + '\n')
f.write('\n')
def merge_captions(captions):
merged_captions = []
previous_caption = None
for caption in captions:
# Skip captions without required keys
if "start" not in caption or "end" not in caption:
continue
# print("Processing caption:", caption) # Debug print
if previous_caption is None:
previous_caption = caption
continue
prev_start_ms = int(previous_caption["start"].split(',')[0].split(':')[2])
curr_start_ms = int(caption["start"].split(',')[0].split(':')[2])
# print("Previous start ms:", prev_start_ms)
# print("Current start ms:", curr_start_ms)
if (previous_caption["start"] == caption["start"] or abs(prev_start_ms - curr_start_ms) <= 1) and previous_caption["end"] == caption["end"]:
# print("Merging captions...")
previous_caption["text"].extend(caption["text"])
else:
# print("Appending previous caption:", previous_caption)
merged_captions.append(previous_caption)
previous_caption = caption
# Append the last caption if it's not already added
if previous_caption:
merged_captions.append(previous_caption)
return merged_captions
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Merge subtitles based on start times within 1 ms and same end time")
parser.add_argument("input_file", type=str, help="Input .srt subtitle file")
parser.add_argument("output_file", type=str, help="Output .srt subtitle file with merged captions")
args = parser.parse_args()
captions = read_srt(args.input_file)
merged_captions = merge_captions(captions)
write_srt(args.output_file, merged_captions)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment