// Copyright 2016 Google Inc. All rights reserved. | |
// Use of this source code is governed by the Apache 2.0 | |
// license that can be found in the LICENSE file. | |
// Command caption reads an audio file and outputs the transcript for it. | |
package main | |
import ( | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"golang.org/x/net/context" | |
"google.golang.org/api/option" | |
"google.golang.org/api/transport" | |
speech "google.golang.org/genproto/googleapis/cloud/speech/v1beta1" | |
) | |
func main() { | |
ctx := context.Background() | |
conn, err := transport.DialGRPC(ctx, | |
option.WithEndpoint("speech.googleapis.com:443"), | |
option.WithScopes("https://www.googleapis.com/auth/cloud-platform"), | |
) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer conn.Close() | |
stream, err := speech.NewSpeechClient(conn).StreamingRecognize(ctx) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// send the initial configuration message. | |
if err := stream.Send(&speech.StreamingRecognizeRequest{ | |
StreamingRequest: &speech.StreamingRecognizeRequest_StreamingConfig{ | |
StreamingConfig: &speech.StreamingRecognitionConfig{ | |
Config: &speech.RecognitionConfig{ | |
Encoding: speech.RecognitionConfig_LINEAR16, | |
SampleRate: 16000, | |
}, | |
}, | |
}, | |
}); err != nil { | |
log.Fatal(err) | |
} | |
go func() { | |
// pipe stdin to the API | |
buf := make([]byte, 1024) | |
for { | |
n, err := os.Stdin.Read(buf) | |
if err == io.EOF { | |
return // nothing else to pipe, kill this goroutine | |
} | |
if err != nil { | |
log.Printf("reading stdin error: %v", err) | |
continue | |
} | |
if err = stream.Send(&speech.StreamingRecognizeRequest{ | |
StreamingRequest: &speech.StreamingRecognizeRequest_AudioContent{ | |
AudioContent: buf[:n], | |
}, | |
}); err != nil { | |
log.Printf("sending audio error: %v", err) | |
} | |
} | |
}() | |
for { | |
resp, err := stream.Recv() | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
// TODO: handle error | |
continue | |
} | |
if resp.Error != nil { | |
// TODO: handle error | |
continue | |
} | |
for _, result := range resp.Results { | |
fmt.Printf("result: %+v\n", result) | |
} | |
} | |
} |
The sample users the application default credentials, see more at https://developers.google.com/identity/protocols/application-default-credentials.
You need the gcloud command line tool to login.
$ gcloud auth login
If you have Speech API activated on the Cloud console, it should work.
why not "google.golang.org/genproto/googleapis/cloud/speech/v1" ?
v1beta1 is newer than v1, the API's team got the versioning a bit wrong :(
Oh I understand, thank you 👍 💯
This looks really cool! How do you recommend piping audio into stdin from a microphone? Or recorded audio? I've never done anything with audio before.
If you're using debian/ubuntu you can use this command:
rec -c 1 -r 8000 -t wav - | go run livecaption.go
You've to install the pkg sox apt install sox
note: I haven't tested it yet
Alternatively, GStreamer should also work on linux. gst-launch pulsesrc | go run livecaption.go.
Thanks, I'll give those a shot. I refactored this slightly to package it up, WDYT? https://gist.github.com/ImJasonH/5100f11db40671733a376b230418c484
(Also untested so far)
Can't make it work. First trouble was auth, which i figured from https://cloud.google.com/speech/docs/common/auth, then no more auth errors but no recognition either... tried different sample rates, no luck.
Hi. Just found this via google while trying to get the speech api working via grpc from go... so thanks it's a big help!
I have a .wav file, could you please tell me how I can pipe this in? I'm guessing I'd need to pipe in just the binary audio data and not the wav header, but am not sure how.
Thanks!
It's working for me with the following command:
gst-launch-1.0 -v pulsesrc ! audioconvert ! audioresample ! audio/x-raw,channels=1,rate=16000 ! filesink location=/dev/stdout | go run livecaption.go
Thanks @rakyll , this is a very useful example! 👏
How I can test this in mac environment?
I tried in mac and got this responce only
bash-3.2$ rec -c 1 -r 8000 -t wav - | go run mylivecaption.go
rec WARN formats: can't set sample rate 8000; using 44100
rec WARN formats: can't set 1 channels; using 2
rec WARN wav: Length in output .wav header will be wrong since can't seek to fix it
Input File : 'default' (coreaudio)
Channels : 2
Sample Rate : 44100
Precision : 32-bit
Sample Encoding: 32-bit Signed Integer PCM
In:0.00% 00:01:02.00 [00:00:00.00] Out:495k [ | ] Clip:0 2016/12/23 13:11:53 sending audio error: EOF
2016/12/23 13:11:53 sending audio error: EOF
2016/12/23 13:11:53 sending audio error: EOF
pls let me know how to fix it
gst-launch-1.0 command is not working in windows can u tell me the required steps for the installation of it.
thanks for this gist! where we should put the OAuth 2.0 client ID or token ?
why not "google.golang.org/genproto/googleapis/cloud/speech/v1" ?