Instantly share code, notes, and snippets.

Embed
What would you like to do?
// Copyright 2016 Google Inc. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
// Command caption reads an audio file and outputs the transcript for it.
package main
import (
"fmt"
"io"
"log"
"os"
"golang.org/x/net/context"
"google.golang.org/api/option"
"google.golang.org/api/transport"
speech "google.golang.org/genproto/googleapis/cloud/speech/v1beta1"
)
func main() {
ctx := context.Background()
conn, err := transport.DialGRPC(ctx,
option.WithEndpoint("speech.googleapis.com:443"),
option.WithScopes("https://www.googleapis.com/auth/cloud-platform"),
)
if err != nil {
log.Fatal(err)
}
defer conn.Close()
stream, err := speech.NewSpeechClient(conn).StreamingRecognize(ctx)
if err != nil {
log.Fatal(err)
}
// send the initial configuration message.
if err := stream.Send(&speech.StreamingRecognizeRequest{
StreamingRequest: &speech.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speech.StreamingRecognitionConfig{
Config: &speech.RecognitionConfig{
Encoding: speech.RecognitionConfig_LINEAR16,
SampleRate: 16000,
},
},
},
}); err != nil {
log.Fatal(err)
}
go func() {
// pipe stdin to the API
buf := make([]byte, 1024)
for {
n, err := os.Stdin.Read(buf)
if err == io.EOF {
return // nothing else to pipe, kill this goroutine
}
if err != nil {
log.Printf("reading stdin error: %v", err)
continue
}
if err = stream.Send(&speech.StreamingRecognizeRequest{
StreamingRequest: &speech.StreamingRecognizeRequest_AudioContent{
AudioContent: buf[:n],
},
}); err != nil {
log.Printf("sending audio error: %v", err)
}
}
}()
for {
resp, err := stream.Recv()
if err == io.EOF {
break
}
if err != nil {
// TODO: handle error
continue
}
if resp.Error != nil {
// TODO: handle error
continue
}
for _, result := range resp.Results {
fmt.Printf("result: %+v\n", result)
}
}
}
@yanpozka

This comment has been minimized.

Show comment
Hide comment
@yanpozka

yanpozka Aug 30, 2016

thanks for this gist! where we should put the OAuth 2.0 client ID or token ?

why not "google.golang.org/genproto/googleapis/cloud/speech/v1" ?

yanpozka commented Aug 30, 2016

thanks for this gist! where we should put the OAuth 2.0 client ID or token ?

why not "google.golang.org/genproto/googleapis/cloud/speech/v1" ?

@rakyll

This comment has been minimized.

Show comment
Hide comment
@rakyll

rakyll Aug 30, 2016

The sample users the application default credentials, see more at https://developers.google.com/identity/protocols/application-default-credentials.

You need the gcloud command line tool to login.

$ gcloud auth login

If you have Speech API activated on the Cloud console, it should work.

why not "google.golang.org/genproto/googleapis/cloud/speech/v1" ?

v1beta1 is newer than v1, the API's team got the versioning a bit wrong :(

Owner

rakyll commented Aug 30, 2016

The sample users the application default credentials, see more at https://developers.google.com/identity/protocols/application-default-credentials.

You need the gcloud command line tool to login.

$ gcloud auth login

If you have Speech API activated on the Cloud console, it should work.

why not "google.golang.org/genproto/googleapis/cloud/speech/v1" ?

v1beta1 is newer than v1, the API's team got the versioning a bit wrong :(

@yanpozka

This comment has been minimized.

Show comment
Hide comment
@yanpozka

yanpozka Aug 30, 2016

Oh I understand, thank you 👍 💯

yanpozka commented Aug 30, 2016

Oh I understand, thank you 👍 💯

@ImJasonH

This comment has been minimized.

Show comment
Hide comment
@ImJasonH

ImJasonH Aug 31, 2016

This looks really cool! How do you recommend piping audio into stdin from a microphone? Or recorded audio? I've never done anything with audio before.

ImJasonH commented Aug 31, 2016

This looks really cool! How do you recommend piping audio into stdin from a microphone? Or recorded audio? I've never done anything with audio before.

@yanpozka

This comment has been minimized.

Show comment
Hide comment
@yanpozka

yanpozka Aug 31, 2016

If you're using debian/ubuntu you can use this command:

rec -c 1 -r 8000 -t wav - | go run livecaption.go

You've to install the pkg sox apt install sox

note: I haven't tested it yet

yanpozka commented Aug 31, 2016

If you're using debian/ubuntu you can use this command:

rec -c 1 -r 8000 -t wav - | go run livecaption.go

You've to install the pkg sox apt install sox

note: I haven't tested it yet

@rakyll

This comment has been minimized.

Show comment
Hide comment
@rakyll

rakyll Aug 31, 2016

Alternatively, GStreamer should also work on linux. gst-launch pulsesrc | go run livecaption.go.

Owner

rakyll commented Aug 31, 2016

Alternatively, GStreamer should also work on linux. gst-launch pulsesrc | go run livecaption.go.

@ImJasonH

This comment has been minimized.

Show comment
Hide comment
@ImJasonH

ImJasonH Sep 1, 2016

Thanks, I'll give those a shot. I refactored this slightly to package it up, WDYT? https://gist.github.com/ImJasonH/5100f11db40671733a376b230418c484

ImJasonH commented Sep 1, 2016

Thanks, I'll give those a shot. I refactored this slightly to package it up, WDYT? https://gist.github.com/ImJasonH/5100f11db40671733a376b230418c484

@ImJasonH

This comment has been minimized.

Show comment
Hide comment
@ImJasonH

ImJasonH Sep 1, 2016

(Also untested so far)

ImJasonH commented Sep 1, 2016

(Also untested so far)

@fiorix

This comment has been minimized.

Show comment
Hide comment
@fiorix

fiorix Sep 2, 2016

Can't make it work. First trouble was auth, which i figured from https://cloud.google.com/speech/docs/common/auth, then no more auth errors but no recognition either... tried different sample rates, no luck.

fiorix commented Sep 2, 2016

Can't make it work. First trouble was auth, which i figured from https://cloud.google.com/speech/docs/common/auth, then no more auth errors but no recognition either... tried different sample rates, no luck.

@mikeleonard

This comment has been minimized.

Show comment
Hide comment
@mikeleonard

mikeleonard Sep 9, 2016

Hi. Just found this via google while trying to get the speech api working via grpc from go... so thanks it's a big help!

I have a .wav file, could you please tell me how I can pipe this in? I'm guessing I'd need to pipe in just the binary audio data and not the wav header, but am not sure how.

Thanks!

mikeleonard commented Sep 9, 2016

Hi. Just found this via google while trying to get the speech api working via grpc from go... so thanks it's a big help!

I have a .wav file, could you please tell me how I can pipe this in? I'm guessing I'd need to pipe in just the binary audio data and not the wav header, but am not sure how.

Thanks!

@antonmry

This comment has been minimized.

Show comment
Hide comment
@antonmry

antonmry Sep 24, 2016

It's working for me with the following command:

gst-launch-1.0 -v pulsesrc ! audioconvert ! audioresample ! audio/x-raw,channels=1,rate=16000 ! filesink location=/dev/stdout | go run livecaption.go

Thanks @rakyll , this is a very useful example! 👏

antonmry commented Sep 24, 2016

It's working for me with the following command:

gst-launch-1.0 -v pulsesrc ! audioconvert ! audioresample ! audio/x-raw,channels=1,rate=16000 ! filesink location=/dev/stdout | go run livecaption.go

Thanks @rakyll , this is a very useful example! 👏

@ssenthil416

This comment has been minimized.

Show comment
Hide comment
@ssenthil416

ssenthil416 Dec 23, 2016

How I can test this in mac environment?

I tried in mac and got this responce only
bash-3.2$ rec -c 1 -r 8000 -t wav - | go run mylivecaption.go
rec WARN formats: can't set sample rate 8000; using 44100
rec WARN formats: can't set 1 channels; using 2
rec WARN wav: Length in output .wav header will be wrong since can't seek to fix it

Input File : 'default' (coreaudio)
Channels : 2
Sample Rate : 44100
Precision : 32-bit
Sample Encoding: 32-bit Signed Integer PCM

In:0.00% 00:01:02.00 [00:00:00.00] Out:495k [ | ] Clip:0 2016/12/23 13:11:53 sending audio error: EOF
2016/12/23 13:11:53 sending audio error: EOF
2016/12/23 13:11:53 sending audio error: EOF

pls let me know how to fix it

ssenthil416 commented Dec 23, 2016

How I can test this in mac environment?

I tried in mac and got this responce only
bash-3.2$ rec -c 1 -r 8000 -t wav - | go run mylivecaption.go
rec WARN formats: can't set sample rate 8000; using 44100
rec WARN formats: can't set 1 channels; using 2
rec WARN wav: Length in output .wav header will be wrong since can't seek to fix it

Input File : 'default' (coreaudio)
Channels : 2
Sample Rate : 44100
Precision : 32-bit
Sample Encoding: 32-bit Signed Integer PCM

In:0.00% 00:01:02.00 [00:00:00.00] Out:495k [ | ] Clip:0 2016/12/23 13:11:53 sending audio error: EOF
2016/12/23 13:11:53 sending audio error: EOF
2016/12/23 13:11:53 sending audio error: EOF

pls let me know how to fix it

@harshit28

This comment has been minimized.

Show comment
Hide comment
@harshit28

harshit28 Nov 22, 2017

gst-launch-1.0 command is not working in windows can u tell me the required steps for the installation of it.

harshit28 commented Nov 22, 2017

gst-launch-1.0 command is not working in windows can u tell me the required steps for the installation of it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment