akostadinov/mux.rb

## mux.rb
#!/bin/env ruby

# License: MIT

require 'shellwords'
require 'tempfile'

class MuxerCLI
  attr_reader :dir

  # https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes
  LANGS = {
    "English" => "eng",
    "Bulgarian" => "bul",
    "Chinese" => "zho",
    "German" => "deu",
    "Italian" => "ita",
    "Spanish" => "spa",
    "Portuguese" => "por",
    "Japanese" => "jpn",
    "French" => "fra",
  }.freeze

  MEDIA_EXT = %w[avi mp4 mpv webm mkv]
  SUBTITLE_EXT = %w[srt vtt]

  def initialize(dir)
    @dir = dir.chomp("/").freeze

    raise "specify valid dir" unless Dir.exists?(dir)
  end

  def file_ext(path)
    path.slice(/(?!:\.)[^.]+$/).freeze
  end

  # def filename(path)
  #   path.slice(%r{/[^/]+$}).freeze
  # end

  def output_path
    @output_name ||= "#{dir}.#{output_ext}"
  end

  def output_ext
    if media_files.all? { file_ext(_1) == "mp4" }
      "mp4"
    elsif media_files.all? { file_ext(_1) == "webm" }
      "webm"
    else
      "mkv"
    end
  end

  def media_files
    return @media_files if @media_files

    discover_files

    @media_files
  end

  def subtitle_files
    return @subtitle_files if @subtitle_files

    discover_files

    @subtitle_files
  end

  def chapter_files
    return @chapter_files if @chapter_files

    discover_files

    @chapter_files
  end

  def discover_files
    return if @media_files && @subtitle_files

    media_files = []
    subtitle_files = []
    chapter_files = []

    Dir.entries(dir).each do |file|
      next if %w[. ..].include? file

      raise "Don't know what to do with directories" if Dir.exists?(full_path(file))

      if "chapters.txt" == file
        chapter_files << file
        next
      end

      ext = file_ext(file)
      if MEDIA_EXT.include? ext
        media_files << file
      elsif SUBTITLE_EXT.include? ext
        subtitle_files << file
      else
        raise "unknown file extension for #{file}"
      end
    end

    raise "one or two media files expected" unless (1..2).include? media_files.size
    raise "maximum one chapters file supported" if chapter_files.size > 1

    @chapter_files = chapter_files
    @media_files = media_files
    @subtitle_files = subtitle_files
    nil
  end

  def full_path(file)
    File.join(dir, file)
  end

  def escaped_path(file)
    Shellwords.escape(full_path(file))
  end

  def media_streams
    return @media_streams if @media_streams

    @media_streams = media_files.sum do |media|
      num = `ffprobe -show_entries format=nb_streams -v 0 -of compact=p=0:nk=1 #{escaped_path(media)}`
      Integer(num)
    end
  end

  def mux_command
    "ffmpeg #{file_options} #{subtitle_options} #{Shellwords.escape output_path}"
  end

  # just include all media as input
  # TODO: handle metadata copying more reliably, so far these are copied from first file
  def file_options
    inputs = (media_files + subtitle_files).map { "-i #{escaped_path(_1)}" }.join(" ")
    maps = (media_files + subtitle_files).size.times.map { "-map #{_1}" }.join(" ")
    "#{inputs} #{chapter_options} -c:v copy -c:a copy -c:s webvtt #{maps}"
  end

  def subtitle_options
    subtitle_stream = media_streams - 1
    # inputnum = media_files.size
    options = subtitle_files.map do |subtitle|
      subtitle_stream += 1
      "-metadata:s:#{subtitle_stream} language=#{lang_code(subtitle)} -metadata:s:#{subtitle_stream} handler_name=#{lang(subtitle)} -metadata:s:#{subtitle_stream} title=#{lang(subtitle)}"
    end

    options.join(" ")
  end

  # TODO: copy chapters from proper media file, not only look for external file
  def chapter_options
    "-i #{Shellwords.escape encoded_chapters} -map_chapters #{(media_files + subtitle_files).size}" if encoded_chapters
  end

  def encoded_chapters
    return @encoded_chapters if @encoded_chapters
    return if chapter_files.empty?

    chapters = File.readlines(full_path(chapter_files.first)).map do |line|
      line = line.strip
      next if line.empty?

      m = /^(\d):(\d{2}):(\d{2}) (.*)$/.match line
      raise "bad chapters format at #{line}" unless m

      hrs = m[1].to_i
      mins = m[2].to_i
      secs = m[3].to_i
      title = m[4]

      minutes = (hrs * 60) + mins
      seconds = secs + (minutes * 60)
      timestamp = (seconds * 1000)
      chap = { "title" => title, "START" => timestamp }
    end

    dst = Tempfile.create(%w[CHAPTERS .txt])
    dst << ";FFMETADATA1\n\n"

    chapters.last["START"] += 1
    chapters.each_with_index do |chapter, i|
      next_chapter = chapters[i + 1]
      break unless next_chapter

      dst << <<~EOCHAPTERS
        [CHAPTER]
        TIMEBASE=1/1000
        START=#{chapter["START"]}
        END=#{next_chapter["START"] - 1}
        title=#{chapter["title"]}

      EOCHAPTERS
    end

    dst.close
    @encoded_chapters = dst.path
  end

  def lang_code(file)
    LANGS[lang(file)]
  end

  def lang(file)
    keys = LANGS.keys.select { file.include? _1 }
    raise "can't find language for #{file}" if keys.empty?
    raise "ambiguous file, found multiple languages: #{keys.join(" ")}" if keys.size > 1
    keys.first
  end

  def run
    # raise "Output file already exists" if File.exists?(Shellwords.escape output_path) # ffmpeg will ask
    command = mux_command
    warn "Running: " + command

    system command
  end
end

MuxerCLI.new(ARGV[0]).run

## muxing files from directory with ffmpeg.md

      
    Raw
  

              muxing files from directory with ffmpeg.md
            
          
    USE

Run by

$ ./mux.rb "Super Cool Video"

the files within directory can have whatever names, except the subtitles must contain their language capitalized.
Generated will be an mp4, webm or mkv file as it is possible (haven't tested mp4, let me know if it fails with vtt subtitles there).
Check https://gist.github.com/akostadinov/5028a4f2d26eb79e80b6ed2b11ab88ff for chapters.txt example.
e.g. you may have a directory with files like this:
ls "Super Cool Video"
chapters.txt # optional
video file name.webm
audio file.mp4
subtitle file English optional.srt
another optional subtitle file but with language Bulgarian.vtt

The output file will be Super Cool Video.mkv because audio and video can't fit in an mp4 or a webm container.
LIMITATIONS


only one audio and one video stream in total of 1 or 2 input media files
only external subtitles, will not mix existing subtitles with such present in media files
if you don't supply chapters, chapters will be copied from first input file which can be the audio or the video file, we don't guarantee which one though will come first in the directory order
global metadata will be copied from the first media file but again, can't guarantee which one will be first in directory order
if chapters.txt is provided, chapters from first media file will be ignored
	#!/bin/env ruby

	# License: MIT

	require 'shellwords'
	require 'tempfile'

	class MuxerCLI
	attr_reader :dir

	# https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes
	LANGS = {
	"English" => "eng",
	"Bulgarian" => "bul",
	"Chinese" => "zho",
	"German" => "deu",
	"Italian" => "ita",
	"Spanish" => "spa",
	"Portuguese" => "por",
	"Japanese" => "jpn",
	"French" => "fra",
	}.freeze

	MEDIA_EXT = %w[avi mp4 mpv webm mkv]
	SUBTITLE_EXT = %w[srt vtt]

	def initialize(dir)
	@dir = dir.chomp("/").freeze

	raise "specify valid dir" unless Dir.exists?(dir)
	end

	def file_ext(path)
	path.slice(/(?!:\.)[^.]+$/).freeze
	end

	# def filename(path)
	# path.slice(%r{/[^/]+$}).freeze
	# end

	def output_path
	@output_name \|\|= "#{dir}.#{output_ext}"
	end

	def output_ext
	if media_files.all? { file_ext(_1) == "mp4" }
	"mp4"
	elsif media_files.all? { file_ext(_1) == "webm" }
	"webm"
	else
	"mkv"
	end
	end

	def media_files
	return @media_files if @media_files

	discover_files

	@media_files
	end

	def subtitle_files
	return @subtitle_files if @subtitle_files

	discover_files

	@subtitle_files
	end

	def chapter_files
	return @chapter_files if @chapter_files

	discover_files

	@chapter_files
	end

	def discover_files
	return if @media_files && @subtitle_files

	media_files = []
	subtitle_files = []
	chapter_files = []

	Dir.entries(dir).each do \|file\|
	next if %w[. ..].include? file

	raise "Don't know what to do with directories" if Dir.exists?(full_path(file))

	if "chapters.txt" == file
	chapter_files << file
	next
	end

	ext = file_ext(file)
	if MEDIA_EXT.include? ext
	media_files << file
	elsif SUBTITLE_EXT.include? ext
	subtitle_files << file
	else
	raise "unknown file extension for #{file}"
	end
	end

	raise "one or two media files expected" unless (1..2).include? media_files.size
	raise "maximum one chapters file supported" if chapter_files.size > 1

	@chapter_files = chapter_files
	@media_files = media_files
	@subtitle_files = subtitle_files
	nil
	end

	def full_path(file)
	File.join(dir, file)
	end

	def escaped_path(file)
	Shellwords.escape(full_path(file))
	end

	def media_streams
	return @media_streams if @media_streams

	@media_streams = media_files.sum do \|media\|
	num = `ffprobe -show_entries format=nb_streams -v 0 -of compact=p=0:nk=1 #{escaped_path(media)}`
	Integer(num)
	end
	end

	def mux_command
	"ffmpeg #{file_options} #{subtitle_options} #{Shellwords.escape output_path}"
	end

	# just include all media as input
	# TODO: handle metadata copying more reliably, so far these are copied from first file
	def file_options
	inputs = (media_files + subtitle_files).map { "-i #{escaped_path(_1)}" }.join(" ")
	maps = (media_files + subtitle_files).size.times.map { "-map #{_1}" }.join(" ")
	"#{inputs} #{chapter_options} -c:v copy -c:a copy -c:s webvtt #{maps}"
	end

	def subtitle_options
	subtitle_stream = media_streams - 1
	# inputnum = media_files.size
	options = subtitle_files.map do \|subtitle\|
	subtitle_stream += 1
	"-metadata:s:#{subtitle_stream} language=#{lang_code(subtitle)} -metadata:s:#{subtitle_stream} handler_name=#{lang(subtitle)} -metadata:s:#{subtitle_stream} title=#{lang(subtitle)}"
	end

	options.join(" ")
	end

	# TODO: copy chapters from proper media file, not only look for external file
	def chapter_options
	"-i #{Shellwords.escape encoded_chapters} -map_chapters #{(media_files + subtitle_files).size}" if encoded_chapters
	end

	def encoded_chapters
	return @encoded_chapters if @encoded_chapters
	return if chapter_files.empty?

	chapters = File.readlines(full_path(chapter_files.first)).map do \|line\|
	line = line.strip
	next if line.empty?

	m = /^(\d):(\d{2}):(\d{2}) (.*)$/.match line
	raise "bad chapters format at #{line}" unless m

	hrs = m[1].to_i
	mins = m[2].to_i
	secs = m[3].to_i
	title = m[4]

	minutes = (hrs * 60) + mins
	seconds = secs + (minutes * 60)
	timestamp = (seconds * 1000)
	chap = { "title" => title, "START" => timestamp }
	end

	dst = Tempfile.create(%w[CHAPTERS .txt])
	dst << ";FFMETADATA1\n\n"

	chapters.last["START"] += 1
	chapters.each_with_index do \|chapter, i\|
	next_chapter = chapters[i + 1]
	break unless next_chapter

	dst << <<~EOCHAPTERS
	[CHAPTER]
	TIMEBASE=1/1000
	START=#{chapter["START"]}
	END=#{next_chapter["START"] - 1}
	title=#{chapter["title"]}

	EOCHAPTERS
	end

	dst.close
	@encoded_chapters = dst.path
	end

	def lang_code(file)
	LANGS[lang(file)]
	end

	def lang(file)
	keys = LANGS.keys.select { file.include? _1 }
	raise "can't find language for #{file}" if keys.empty?
	raise "ambiguous file, found multiple languages: #{keys.join(" ")}" if keys.size > 1
	keys.first
	end

	def run
	# raise "Output file already exists" if File.exists?(Shellwords.escape output_path) # ffmpeg will ask
	command = mux_command
	warn "Running: " + command

	system command
	end
	end

	MuxerCLI.new(ARGV[0]).run