Skip to content

Instantly share code, notes, and snippets.

@Burgestrand
Last active March 15, 2018 15:15
Show Gist options
  • Save Burgestrand/b2310cb4ca05455acdbbe8be8598bb8f to your computer and use it in GitHub Desktop.
Save Burgestrand/b2310cb4ca05455acdbbe8be8598bb8f to your computer and use it in GitHub Desktop.
Speech recognition in Ruby using Google Cloud Speech and Easy Audio
# frozen_string_literal: true
source "https://rubygems.org"
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
gem "easy_audio", github: "lsegal/easy_audio"
gem "google-cloud-speech"
gem "pry"
GIT
remote: https://github.com/lsegal/easy_audio
revision: ae535c2efdae6fe0b5154e1ed371955ea0c09b58
specs:
easy_audio (0.1.0)
ffi-portaudio (~> 0.0)
GEM
remote: https://rubygems.org/
specs:
addressable (2.5.2)
public_suffix (>= 2.0.2, < 4.0)
coderay (1.1.2)
faraday (0.14.0)
multipart-post (>= 1.2, < 3)
ffi (1.9.23)
ffi-portaudio (0.1.2)
ffi
google-cloud-core (1.2.0)
google-cloud-env (~> 1.0)
google-cloud-env (1.0.1)
faraday (~> 0.11)
google-cloud-speech (0.29.0)
google-cloud-core (~> 1.2)
google-gax (~> 1.0)
google-gax (1.0.1)
google-protobuf (~> 3.2)
googleapis-common-protos (>= 1.3.5, < 2.0)
googleauth (~> 0.6.2)
grpc (>= 1.7.2, < 2.0)
rly (~> 0.2.3)
google-protobuf (3.5.1.2)
googleapis-common-protos (1.3.7)
google-protobuf (~> 3.0)
googleapis-common-protos-types (~> 1.0)
grpc (~> 1.0)
googleapis-common-protos-types (1.0.1)
google-protobuf (~> 3.0)
googleauth (0.6.2)
faraday (~> 0.12)
jwt (>= 1.4, < 3.0)
logging (~> 2.0)
memoist (~> 0.12)
multi_json (~> 1.11)
os (~> 0.9)
signet (~> 0.7)
grpc (1.10.0)
google-protobuf (~> 3.1)
googleapis-common-protos-types (~> 1.0.0)
googleauth (>= 0.5.1, < 0.7)
jwt (2.1.0)
little-plugger (1.1.4)
logging (2.2.2)
little-plugger (~> 1.1)
multi_json (~> 1.10)
memoist (0.16.0)
method_source (0.9.0)
multi_json (1.13.1)
multipart-post (2.0.0)
os (0.9.6)
pry (0.11.3)
coderay (~> 1.1.0)
method_source (~> 0.9.0)
public_suffix (3.0.2)
rly (0.2.3)
signet (0.8.1)
addressable (~> 2.3)
faraday (~> 0.9)
jwt (>= 1.5, < 3.0)
multi_json (~> 1.10)
PLATFORMS
ruby
DEPENDENCIES
easy_audio!
google-cloud-speech
pry
BUNDLED WITH
1.16.1
require "bundler/setup"
require "easy_audio"
require "google/cloud/speech"
require "pry"
require "stringio"
require "json"
#
# Audio Recorder
#
def convert(f)
i = (f * 32768).to_i # (2 ** 16) / 2
if i > 32767
32767
elsif i < -32768
-32768
else
i
end
end
SAMPLE_RATE = 44_100
CHANNELS = 1
FRAME_SIZE = 256
output_buffer = nil
stream = EasyAudio::Stream.new({
sample_rate: SAMPLE_RATE,
in_chans: CHANNELS,
frame_size: FRAME_SIZE
}) do |buffer|
if output_buffer != nil
int16_samples = buffer.samples.map { |sample| convert(sample) }
int16_samples.pack("s<#{FRAME_SIZE}", buffer: output_buffer)
end
:paContinue
end
stream.start
#
# Audio Analyzer
#
credentials = JSON.parse(File.read("./credentials.json"))
speech = Google::Cloud::Speech.new(
project_id: credentials.fetch("project_id"),
credentials: credentials
)
loop do
puts "Starting! Speak freely for five seconds!"
output_buffer = "".b
sleep 5
puts "Analyzing the last 5 seconds…"
analyze = output_buffer
output_buffer = nil
audio = speech.audio StringIO.new(analyze),
encoding: :linear16,
language: "en-US",
sample_rate: SAMPLE_RATE
if result = audio.recognize.max(&:confidence)
puts "You said: #{result.transcript}"
else
puts "No idea what you said!"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment