Created
February 5, 2012 17:06
-
-
Save moski/1746601 to your computer and use it in GitHub Desktop.
An Engine to translate voice input files between languages. http://blog.moski.me/2012/02/building-siri-arabic-support-arabic.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # encoding: utf-8 | |
| # | |
| # @Details: blog post http://blog.moski.me/2012/02/building-siri-arabic-support-arabic.html | |
| # @Install: | |
| # Make sure to have FFMPEG installed using | |
| # brew install ffmpeg or ports install ffmpeg | |
| # | |
| # Install the dependencies | |
| # gem install speech2text | |
| # @Example: | |
| # You can download a sample input audio file from here http://cl.ly/2d220o0I0n2q0H022K0A | |
| # | |
| # require './voice2voice.rb' | |
| # client = Voice2Voice.new('./time.wav', :ar, :en) | |
| # client.process! | |
| # | |
| require 'speech' | |
| require 'open-uri' | |
| require 'cgi' | |
| require 'json' | |
| # Extend the speech class to support arabic | |
| module Speech | |
| class AudioToText | |
| def to_text(lang="ar") | |
| url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=#{lang}&maxresults=2" | |
| splitter = Speech::AudioSplitter.new(file,300) # based off the wave file because flac doesn't tell us the duration | |
| easy = Curl::Easy.new(url) | |
| splitter.split.each do|chunk| | |
| chunk.build.to_flac | |
| convert_chunk(easy, chunk) | |
| end | |
| JSON.parse(File.read(self.captured_file)) | |
| end | |
| end | |
| end | |
| # A module to translate text using the google translate page. | |
| module Google | |
| class Translator | |
| # The URL for the google translate page. | |
| GOOGLE_TRANSLATE_SERVICE_URL = "http://translate.google.com" | |
| # Define the basic exceptions used in this class | |
| def self.Exception(*names) | |
| cl = Module === self ? self : Object | |
| names.each {|n| cl.const_set(n, Class.new(Exception))} | |
| end | |
| # Basic exceptions. | |
| Exception :MissingFromLanguage, :MissingToLanguage, :MissingTextLanguage, :TranslateServerIsDown | |
| def translate(from, to, from_text, options={}) | |
| raise(MissingFromLanguage) if from.nil? | |
| raise(MissingToLanguage) if to.nil? | |
| raise(MissingTextLanguage) if from_text.nil? | |
| begin | |
| url = GOOGLE_TRANSLATE_SERVICE_URL + "/translate_a/t?client=t&text=#{from_text}&hl=#{from}&sl=auto&tl=#{to}&multires=1&prev=btn&ssel=0&tsel=4&uptl=#{to}&alttl=#{from}&sc=1" | |
| puts "Translate URL: #{url}" | |
| agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2' | |
| open(URI.escape(url), "User-Agent" => agent) do |stream| | |
| # Read the stream and conver it into a valid json format | |
| content = stream.read | |
| s = content.split(',').collect {|s| s == '' ? "\"\"" : s}.join(",") | |
| # parse the json output | |
| result = JSON.parse(s) | |
| raise(TranslateServerIsDown) if (!result || result.empty?) | |
| r1 = result[0][0][0] | |
| r2 = result[0][0][2] | |
| [r1, r2] | |
| end | |
| rescue Exception => e | |
| raise(TranslateServerIsDown) | |
| end | |
| end | |
| end | |
| end | |
| class Voice2Voice | |
| attr_accessor :file, :input_text, :output_text | |
| attr_accessor :from_lang, :to_lang | |
| # Need an Audio File, Language for this file and the | |
| # desired output language. | |
| def initialize(file, from=:ar, to=:en) | |
| self.file = file | |
| self.from_lang = from | |
| self.to_lang = to | |
| end | |
| def process! | |
| # Convert the Audio file To Text | |
| # The after math of this function: | |
| # 1.input_text: will be updated | |
| # | |
| voice2Text | |
| # | |
| # Convert the InputText generated from voice2Text to the to_lang | |
| # The after math of this function: | |
| # 1.output_text: will be updated | |
| # | |
| tranlsateText | |
| # | |
| # Generate an | |
| # | |
| text2Voice | |
| end | |
| def voice2Text | |
| audio = Speech::AudioToText.new(file) | |
| self.input_text = audio.to_text(self.from_lang)["hypotheses"].first.first rescue nil | |
| puts "Arabic: #{self.output_text}" | |
| self.input_text | |
| end | |
| def tranlsateText | |
| return nil if self.input_text.nil? | |
| translator = Google::Translator.new | |
| self.output_text = translator.translate(from_lang,to_lang, self.input_text).first rescue nil | |
| puts "English: #{self.output_text}" | |
| self.output_text | |
| end | |
| def text2Voice | |
| url = "http://translate.google.com/translate_tts?tl=en&q=#{self.output_text}" | |
| agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2' | |
| buffer = open(URI.escape(url), 'UserAgent' => agent).read | |
| mp3 = "./" + rand_string() + '.mp3' | |
| File.open(mp3, 'w'){|f| f.write buffer} | |
| mp3 | |
| end | |
| private | |
| # Generate a random filename. | |
| def rand_string(size=20) | |
| chars = ("a".."z").to_a + ("1".."9").to_a | |
| str = Array.new(size, '').collect{chars[rand(chars.size)]}.join | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment