Created
October 19, 2023 19:02
-
-
Save J-Swift/b07f5a6974dbe6500c83cccb9fe49ae2 to your computer and use it in GitHub Desktop.
Fix issues with SRT files where simultaneous entries are split across multiple timestamp entries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
IN_FILE = ARGV[0] | |
OUT_FILE = ARGV[1] | |
def usage | |
puts <<-EOF | |
USAGE: fix.rb {INFILE} {OUTFILE} | |
EOF | |
end | |
if IN_FILE == "--help" || IN_FILE == "-h" | |
usage | |
exit 0 | |
end | |
if IN_FILE.nil? || OUT_FILE.nil? | |
puts "ERROR: missing required param" | |
puts | |
usage | |
exit 1 | |
end | |
class SubEntry | |
attr_accessor :index, :starttime, :stoptime, :text | |
def initialize | |
@text = [] | |
end | |
def to_s | |
[@index, "#{@starttime} --> #{@stoptime}", @text.join("\n")].join("\n") | |
end | |
end | |
INDEX_REGEX = /^\d+$/ | |
def get_index(line) | |
if line.match?(INDEX_REGEX) | |
line.to_i | |
else | |
nil | |
end | |
end | |
# 00:00:25,776 --> 00:00:29,404 | |
TIMES_REGEX = /^(?<starttime>\d\d:\d\d:\d\d,\d+) --> (?<stoptime>\d\d:\d\d:\d\d,\d+)$/ | |
def get_times(line) | |
matches = line.match(TIMES_REGEX) or raise "Invalid times [#{line}]" | |
[matches[:starttime], matches[:stoptime]] | |
end | |
class SubReader | |
attr_accessor :sub_entries, :lines | |
def initialize(lines) | |
@lines = lines.map(&:strip).reduce([]) do |memo, val| | |
memo << val unless val.empty? | |
memo | |
end | |
@sub_entries = [] | |
iter = lines.to_enum | |
val = iter.next rescue :stop | |
cur_entry = nil | |
while val != :stop | |
if idx = get_index(val) | |
@sub_entries << cur_entry unless cur_entry.nil? | |
cur_entry = SubEntry.new | |
cur_entry.index = idx | |
starttime, stoptime = get_times(iter.next) | |
cur_entry.starttime = starttime | |
cur_entry.stoptime = stoptime | |
else | |
cur_entry.text << val.strip unless val.strip.empty? | |
end | |
val = iter.next rescue :stop | |
end | |
@sub_entries << cur_entry unless cur_entry.nil? | |
end | |
def print | |
puts "Raw Lines [#{@sub_entries.count}]" | |
end | |
end | |
class SubMerger | |
attr_accessor :sub_entries, :merged_entries | |
def initialize(sub_entries) | |
@sub_entries = sub_entries.clone | |
merge! | |
end | |
def merge! | |
@merged_entries = [] | |
iter = @sub_entries.to_enum | |
val = iter.next rescue :stop | |
cur_entry = nil | |
cur_idx = @sub_entries[0].index | |
while val != :stop | |
if cur_entry.nil? | |
cur_entry = val.clone | |
cur_entry.index = cur_idx | |
cur_idx += 1 | |
end | |
next_val = iter.peek rescue :stop | |
if next_val != :stop && next_val.starttime == cur_entry.starttime && next_val.stoptime == cur_entry.stoptime | |
cur_entry.text = cur_entry.text.concat(next_val.text) | |
else | |
@merged_entries << cur_entry unless cur_entry.nil? | |
cur_entry = nil | |
end | |
val = iter.next rescue :stop | |
end | |
@merged_entries << cur_entry unless cur_entry.nil? | |
end | |
def print | |
puts "Merged Lines [#{@merged_entries.count}]" | |
end | |
end | |
class SubWriter | |
attr_accessor :sub_entries, :filepath | |
def initialize(sub_entries, filepath) | |
@sub_entries = sub_entries.clone | |
@filepath = filepath | |
end | |
def write! | |
File.open(@filepath, "w") do |file| | |
@sub_entries.each do |entry| | |
file.write(entry.to_s) | |
file.write("\n\n") | |
end | |
end | |
end | |
end | |
def main | |
lines = IO.readlines(IN_FILE) | |
reader = SubReader.new(lines) | |
merger = SubMerger.new(reader.sub_entries) | |
writer = SubWriter.new(merger.merged_entries, OUT_FILE) | |
before = reader.sub_entries.count | |
after = merger.merged_entries.count | |
if after == before | |
puts "No fixes required" | |
else | |
writer.write! | |
puts "Merged [#{before - after}] entries" | |
end | |
puts | |
puts 'Done.' | |
end | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment