Created
February 29, 2024 23:24
-
-
Save sigio/0f8fa1a933a8afcf67b9290c1c96cd04 to your computer and use it in GitHub Desktop.
Fix hidive srt subs for mkv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# Fix (hidive) srt subtitles for mkv player | |
# These subs seem to have multi-line captions with a 1ms start offset | |
# mpv then prints these in the wrong order (issue #7070) | |
# This script will merge subtitle lines correctly for mpv | |
# | |
# Public domain (created mostly with ChatGPT) | |
# | |
import argparse | |
def read_srt(input_file): | |
with open(input_file, 'r') as f: | |
lines = f.readlines() | |
captions = [] | |
current_caption = None | |
index = 1 # Initialize index counter | |
for line in lines: | |
line = line.strip() | |
# Skip lines containing only a single integer | |
if not line or line.isdigit(): | |
continue | |
# print("Processing line:", line) # Debug print | |
# If the line contains timestamps | |
if '-->' in line: | |
# If there's an ongoing caption, add it to the captions list | |
if current_caption: | |
captions.append(current_caption) | |
current_caption = None | |
start, end = line.split(' --> ') | |
current_caption = {"index": str(index), "start": start.strip(), "end": end.strip(), "text": []} | |
index += 1 # Increment index for next caption | |
else: | |
# If it's not a timestamp line, it must be part of the caption text | |
if current_caption is not None: | |
current_caption["text"].append(line) | |
# Append the last caption if there's one remaining | |
if current_caption: | |
captions.append(current_caption) | |
# print("Captions:", captions) # Debug print | |
return captions | |
def write_srt(output_file, captions): | |
with open(output_file, 'w') as f: | |
for caption in captions: | |
# Skip captions without a valid index | |
if "index" not in caption or caption["index"] is None: | |
continue | |
# Write index line only for the first caption in a merged group | |
if caption.get("merged", False) is False: | |
f.write(caption["index"] + '\n') | |
f.write(caption["start"] + ' --> ' + caption["end"] + '\n') | |
for line in caption["text"]: | |
f.write(line + '\n') | |
f.write('\n') | |
def merge_captions(captions): | |
merged_captions = [] | |
previous_caption = None | |
for caption in captions: | |
# Skip captions without required keys | |
if "start" not in caption or "end" not in caption: | |
continue | |
# print("Processing caption:", caption) # Debug print | |
if previous_caption is None: | |
previous_caption = caption | |
continue | |
prev_start_ms = int(previous_caption["start"].split(',')[0].split(':')[2]) | |
curr_start_ms = int(caption["start"].split(',')[0].split(':')[2]) | |
# print("Previous start ms:", prev_start_ms) | |
# print("Current start ms:", curr_start_ms) | |
if (previous_caption["start"] == caption["start"] or abs(prev_start_ms - curr_start_ms) <= 1) and previous_caption["end"] == caption["end"]: | |
# print("Merging captions...") | |
previous_caption["text"].extend(caption["text"]) | |
else: | |
# print("Appending previous caption:", previous_caption) | |
merged_captions.append(previous_caption) | |
previous_caption = caption | |
# Append the last caption if it's not already added | |
if previous_caption: | |
merged_captions.append(previous_caption) | |
return merged_captions | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Merge subtitles based on start times within 1 ms and same end time") | |
parser.add_argument("input_file", type=str, help="Input .srt subtitle file") | |
parser.add_argument("output_file", type=str, help="Output .srt subtitle file with merged captions") | |
args = parser.parse_args() | |
captions = read_srt(args.input_file) | |
merged_captions = merge_captions(captions) | |
write_srt(args.output_file, merged_captions) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment