Skip to content

Instantly share code, notes, and snippets.

@J-Swift
Created October 19, 2023 19:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save J-Swift/b07f5a6974dbe6500c83cccb9fe49ae2 to your computer and use it in GitHub Desktop.
Save J-Swift/b07f5a6974dbe6500c83cccb9fe49ae2 to your computer and use it in GitHub Desktop.
Fix issues with SRT files where simultaneous entries are split across multiple timestamp entries
#!/usr/bin/env ruby
IN_FILE = ARGV[0]
OUT_FILE = ARGV[1]
def usage
puts <<-EOF
USAGE: fix.rb {INFILE} {OUTFILE}
EOF
end
if IN_FILE == "--help" || IN_FILE == "-h"
usage
exit 0
end
if IN_FILE.nil? || OUT_FILE.nil?
puts "ERROR: missing required param"
puts
usage
exit 1
end
class SubEntry
attr_accessor :index, :starttime, :stoptime, :text
def initialize
@text = []
end
def to_s
[@index, "#{@starttime} --> #{@stoptime}", @text.join("\n")].join("\n")
end
end
INDEX_REGEX = /^\d+$/
def get_index(line)
if line.match?(INDEX_REGEX)
line.to_i
else
nil
end
end
# 00:00:25,776 --> 00:00:29,404
TIMES_REGEX = /^(?<starttime>\d\d:\d\d:\d\d,\d+) --> (?<stoptime>\d\d:\d\d:\d\d,\d+)$/
def get_times(line)
matches = line.match(TIMES_REGEX) or raise "Invalid times [#{line}]"
[matches[:starttime], matches[:stoptime]]
end
class SubReader
attr_accessor :sub_entries, :lines
def initialize(lines)
@lines = lines.map(&:strip).reduce([]) do |memo, val|
memo << val unless val.empty?
memo
end
@sub_entries = []
iter = lines.to_enum
val = iter.next rescue :stop
cur_entry = nil
while val != :stop
if idx = get_index(val)
@sub_entries << cur_entry unless cur_entry.nil?
cur_entry = SubEntry.new
cur_entry.index = idx
starttime, stoptime = get_times(iter.next)
cur_entry.starttime = starttime
cur_entry.stoptime = stoptime
else
cur_entry.text << val.strip unless val.strip.empty?
end
val = iter.next rescue :stop
end
@sub_entries << cur_entry unless cur_entry.nil?
end
def print
puts "Raw Lines [#{@sub_entries.count}]"
end
end
class SubMerger
attr_accessor :sub_entries, :merged_entries
def initialize(sub_entries)
@sub_entries = sub_entries.clone
merge!
end
def merge!
@merged_entries = []
iter = @sub_entries.to_enum
val = iter.next rescue :stop
cur_entry = nil
cur_idx = @sub_entries[0].index
while val != :stop
if cur_entry.nil?
cur_entry = val.clone
cur_entry.index = cur_idx
cur_idx += 1
end
next_val = iter.peek rescue :stop
if next_val != :stop && next_val.starttime == cur_entry.starttime && next_val.stoptime == cur_entry.stoptime
cur_entry.text = cur_entry.text.concat(next_val.text)
else
@merged_entries << cur_entry unless cur_entry.nil?
cur_entry = nil
end
val = iter.next rescue :stop
end
@merged_entries << cur_entry unless cur_entry.nil?
end
def print
puts "Merged Lines [#{@merged_entries.count}]"
end
end
class SubWriter
attr_accessor :sub_entries, :filepath
def initialize(sub_entries, filepath)
@sub_entries = sub_entries.clone
@filepath = filepath
end
def write!
File.open(@filepath, "w") do |file|
@sub_entries.each do |entry|
file.write(entry.to_s)
file.write("\n\n")
end
end
end
end
def main
lines = IO.readlines(IN_FILE)
reader = SubReader.new(lines)
merger = SubMerger.new(reader.sub_entries)
writer = SubWriter.new(merger.merged_entries, OUT_FILE)
before = reader.sub_entries.count
after = merger.merged_entries.count
if after == before
puts "No fixes required"
else
writer.write!
puts "Merged [#{before - after}] entries"
end
puts
puts 'Done.'
end
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment