Skip to content

Instantly share code, notes, and snippets.

@tily
Created January 6, 2010 13:23
Show Gist options
  • Save tily/270269 to your computer and use it in GitHub Desktop.
Save tily/270269 to your computer and use it in GitHub Desktop.
extract solamimi from mp3 file
require 'rubygems'
require 'scissor'
require 'pit'
# = ABSTRACT
# extract solamimi text from mp3 file using scissor, ffmpeg and julius
# = USAGE
# ruby solamimi.rb /path/to/target.mp3 start_time length
# = EXAMPLE
# ruby solamimi.rb ~/Music/iTunes/iTunes\ Music/The\ Beatles/1962-1966\ \[Disc\ 1\]/1-02\ Please\ Please\ Me.mp3 ~/20100103/dictation-kit-v4.0/fast.jconf 1:02.22 2.3
class Solamimi
def self.listen_to(mp3_path, options=nil)
self.new(mp3_path, options).process
end
attr_reader :japanese, :start, :length
def initialize(mp3_path, options=nil)
raise ::ArgumentError.new('arg must be mp3 file') unless mp3_path =~ /\.mp3$/
@mp3_path = mp3_path
@options = options || {}
end
def process
TempDir.create do |dir|
Scissor(@mp3_path)[start, length] > "#{dir}/solamimi.mp3"
mp3_to_wav("#{dir}/solamimi.mp3", "#{dir}/solamimi.wav")
@japanese = text_from_wav("#{dir}/solamimi.wav")
end
self
end
def mp3_to_wav(source, target)
system "ffmpeg -y -i #{source} -ac 1 -ar 16000 #{target} 2>/dev/null"
end
def text_from_wav(source)
result = `echo #{source} | julius #{julius_options} 2>/dev/null`
result.match(/\nsentence1: (.+?)\n/)[1].gsub(/\s/, '')
end
def julius_options
"-charconv EUC-JP UTF-8 -C #{@options[:jconf_path]}"
end
def start
@start ||= @options[:start] || 0
end
def length
@length ||= @options[:length] || 5
end
end
mp3_path, start, length = ARGV
jconf_path = Pit.get('julius', :require => {'conf_path' => 'your julius conf file path'})['conf_path']
start_info = start.split(':')
if start_info.size == 2
sec_start = start_info[0].to_f * 60 + start_info[1].to_f
elsif start_info.size == 1
sec_start = start.to_f
else
exit 1
end
length = length.to_f || 5
s = Solamimi.listen_to(mp3_path, {
:start => sec_start,
:length => length,
:jconf_path => jconf_path
})
puts "「#{s.japanese}」(#{start} - #{length})"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment