Last active
March 15, 2018 15:15
-
-
Save Burgestrand/b2310cb4ca05455acdbbe8be8598bb8f to your computer and use it in GitHub Desktop.
Speech recognition in Ruby using Google Cloud Speech and Easy Audio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
source "https://rubygems.org" | |
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } | |
gem "easy_audio", github: "lsegal/easy_audio" | |
gem "google-cloud-speech" | |
gem "pry" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GIT | |
remote: https://github.com/lsegal/easy_audio | |
revision: ae535c2efdae6fe0b5154e1ed371955ea0c09b58 | |
specs: | |
easy_audio (0.1.0) | |
ffi-portaudio (~> 0.0) | |
GEM | |
remote: https://rubygems.org/ | |
specs: | |
addressable (2.5.2) | |
public_suffix (>= 2.0.2, < 4.0) | |
coderay (1.1.2) | |
faraday (0.14.0) | |
multipart-post (>= 1.2, < 3) | |
ffi (1.9.23) | |
ffi-portaudio (0.1.2) | |
ffi | |
google-cloud-core (1.2.0) | |
google-cloud-env (~> 1.0) | |
google-cloud-env (1.0.1) | |
faraday (~> 0.11) | |
google-cloud-speech (0.29.0) | |
google-cloud-core (~> 1.2) | |
google-gax (~> 1.0) | |
google-gax (1.0.1) | |
google-protobuf (~> 3.2) | |
googleapis-common-protos (>= 1.3.5, < 2.0) | |
googleauth (~> 0.6.2) | |
grpc (>= 1.7.2, < 2.0) | |
rly (~> 0.2.3) | |
google-protobuf (3.5.1.2) | |
googleapis-common-protos (1.3.7) | |
google-protobuf (~> 3.0) | |
googleapis-common-protos-types (~> 1.0) | |
grpc (~> 1.0) | |
googleapis-common-protos-types (1.0.1) | |
google-protobuf (~> 3.0) | |
googleauth (0.6.2) | |
faraday (~> 0.12) | |
jwt (>= 1.4, < 3.0) | |
logging (~> 2.0) | |
memoist (~> 0.12) | |
multi_json (~> 1.11) | |
os (~> 0.9) | |
signet (~> 0.7) | |
grpc (1.10.0) | |
google-protobuf (~> 3.1) | |
googleapis-common-protos-types (~> 1.0.0) | |
googleauth (>= 0.5.1, < 0.7) | |
jwt (2.1.0) | |
little-plugger (1.1.4) | |
logging (2.2.2) | |
little-plugger (~> 1.1) | |
multi_json (~> 1.10) | |
memoist (0.16.0) | |
method_source (0.9.0) | |
multi_json (1.13.1) | |
multipart-post (2.0.0) | |
os (0.9.6) | |
pry (0.11.3) | |
coderay (~> 1.1.0) | |
method_source (~> 0.9.0) | |
public_suffix (3.0.2) | |
rly (0.2.3) | |
signet (0.8.1) | |
addressable (~> 2.3) | |
faraday (~> 0.9) | |
jwt (>= 1.5, < 3.0) | |
multi_json (~> 1.10) | |
PLATFORMS | |
ruby | |
DEPENDENCIES | |
easy_audio! | |
google-cloud-speech | |
pry | |
BUNDLED WITH | |
1.16.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "bundler/setup" | |
require "easy_audio" | |
require "google/cloud/speech" | |
require "pry" | |
require "stringio" | |
require "json" | |
# | |
# Audio Recorder | |
# | |
def convert(f) | |
i = (f * 32768).to_i # (2 ** 16) / 2 | |
if i > 32767 | |
32767 | |
elsif i < -32768 | |
-32768 | |
else | |
i | |
end | |
end | |
SAMPLE_RATE = 44_100 | |
CHANNELS = 1 | |
FRAME_SIZE = 256 | |
output_buffer = nil | |
stream = EasyAudio::Stream.new({ | |
sample_rate: SAMPLE_RATE, | |
in_chans: CHANNELS, | |
frame_size: FRAME_SIZE | |
}) do |buffer| | |
if output_buffer != nil | |
int16_samples = buffer.samples.map { |sample| convert(sample) } | |
int16_samples.pack("s<#{FRAME_SIZE}", buffer: output_buffer) | |
end | |
:paContinue | |
end | |
stream.start | |
# | |
# Audio Analyzer | |
# | |
credentials = JSON.parse(File.read("./credentials.json")) | |
speech = Google::Cloud::Speech.new( | |
project_id: credentials.fetch("project_id"), | |
credentials: credentials | |
) | |
loop do | |
puts "Starting! Speak freely for five seconds!" | |
output_buffer = "".b | |
sleep 5 | |
puts "Analyzing the last 5 seconds…" | |
analyze = output_buffer | |
output_buffer = nil | |
audio = speech.audio StringIO.new(analyze), | |
encoding: :linear16, | |
language: "en-US", | |
sample_rate: SAMPLE_RATE | |
if result = audio.recognize.max(&:confidence) | |
puts "You said: #{result.transcript}" | |
else | |
puts "No idea what you said!" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment