Basic usage of the CMU Sphinx voice recognition toolkit in JRuby
require "java" | |
require "benchmark" | |
# To install and build CMU Sphinx, from the directory where you wish to run this script: | |
# | |
# svn co https://svn.code.sf.net/p/cmusphinx/code/trunk/sphinx4 | |
# cd sphinx4 | |
# ant | |
# | |
$CLASSPATH << "sphinx4/bld" | |
import "edu.cmu.sphinx.frontend.util.AudioFileDataSource" | |
import "edu.cmu.sphinx.recognizer.Recognizer" | |
import "edu.cmu.sphinx.result.Result" | |
import "edu.cmu.sphinx.util.props.ConfigurationManager" | |
import "javax.sound.sampled.UnsupportedAudioFileException" | |
import "java.io.IOException" | |
import "java.net.URL" | |
class Transcriber | |
# Transcribe an audio file | |
# @param [String] audio_file_name | |
def self.run(audio_file_name) | |
transcriber = new(audio_file_name) | |
transcriber.run | |
end | |
# @param [String] audio_file_name The name of an audio file to transcribe | |
def initialize(audio_file_name) | |
@audio_file_name = audio_file_name | |
configure | |
end | |
# Run the transcriber | |
# @return [Array<String>] Transcribed text | |
def run | |
@results ||= [] | |
while !(result = @recognizer.recognize).nil? do | |
string = result.get_best_result_no_filler | |
@results << string if string != "" | |
end | |
@results | |
end | |
private | |
# Configure the transcriber | |
def configure | |
load_configuration | |
configure_recognizer | |
load_audio | |
end | |
# Load the CMU Sphinx configuration XML file | |
def load_configuration | |
@class_loader = JRuby.runtime.get_jruby_class_loader | |
Java::java.lang.Thread.current_thread.set_context_class_loader(@class_loader) | |
resource = @class_loader.get_resource("sphinx4/bld/edu/cmu/sphinx/demo/transcriber/config.xml") | |
@configuration = ConfigurationManager.new(resource) | |
end | |
# Configure the CMU Sphinx recognizer object | |
def configure_recognizer | |
@recognizer = @configuration.lookup("recognizer") | |
@recognizer.allocate | |
end | |
# Load the audio file | |
def load_audio | |
audio_url = @class_loader.get_resource(@audio_file_name) | |
data_source = @configuration.lookup("audioFileDataSource") | |
data_source.set_audio_file(audio_url, nil) | |
end | |
end | |
# Run | |
output = [] | |
benchmark = Benchmark.measure do | |
output = Transcriber.run("test.wav") | |
end | |
puts output | |
puts benchmark |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment