// Copyright 2016 Google Inc. All rights reserved. | |
// Use of this source code is governed by the Apache 2.0 | |
// license that can be found in the LICENSE file. | |
// Command caption reads an audio file and outputs the transcript for it. | |
package main | |
import ( | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"golang.org/x/net/context" | |
"google.golang.org/api/option" | |
"google.golang.org/api/transport" | |
speech "google.golang.org/genproto/googleapis/cloud/speech/v1beta1" | |
) | |
func main() { | |
ctx := context.Background() | |
conn, err := transport.DialGRPC(ctx, | |
option.WithEndpoint("speech.googleapis.com:443"), | |
option.WithScopes("https://www.googleapis.com/auth/cloud-platform"), | |
) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer conn.Close() | |
stream, err := speech.NewSpeechClient(conn).StreamingRecognize(ctx) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// send the initial configuration message. | |
if err := stream.Send(&speech.StreamingRecognizeRequest{ | |
StreamingRequest: &speech.StreamingRecognizeRequest_StreamingConfig{ | |
StreamingConfig: &speech.StreamingRecognitionConfig{ | |
Config: &speech.RecognitionConfig{ | |
Encoding: speech.RecognitionConfig_LINEAR16, | |
SampleRate: 16000, | |
}, | |
}, | |
}, | |
}); err != nil { | |
log.Fatal(err) | |
} | |
go func() { | |
// pipe stdin to the API | |
buf := make([]byte, 1024) | |
for { | |
n, err := os.Stdin.Read(buf) | |
if err == io.EOF { | |
return // nothing else to pipe, kill this goroutine | |
} | |
if err != nil { | |
log.Printf("reading stdin error: %v", err) | |
continue | |
} | |
if err = stream.Send(&speech.StreamingRecognizeRequest{ | |
StreamingRequest: &speech.StreamingRecognizeRequest_AudioContent{ | |
AudioContent: buf[:n], | |
}, | |
}); err != nil { | |
log.Printf("sending audio error: %v", err) | |
} | |
} | |
}() | |
for { | |
resp, err := stream.Recv() | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
// TODO: handle error | |
continue | |
} | |
if resp.Error != nil { | |
// TODO: handle error | |
continue | |
} | |
for _, result := range resp.Results { | |
fmt.Printf("result: %+v\n", result) | |
} | |
} | |
} |
This comment has been minimized.
This comment has been minimized.
The sample users the application default credentials, see more at https://developers.google.com/identity/protocols/application-default-credentials. You need the gcloud command line tool to login.
If you have Speech API activated on the Cloud console, it should work.
v1beta1 is newer than v1, the API's team got the versioning a bit wrong :( |
This comment has been minimized.
This comment has been minimized.
Oh I understand, thank you |
This comment has been minimized.
This comment has been minimized.
This looks really cool! How do you recommend piping audio into stdin from a microphone? Or recorded audio? I've never done anything with audio before. |
This comment has been minimized.
This comment has been minimized.
If you're using debian/ubuntu you can use this command: rec -c 1 -r 8000 -t wav - | go run livecaption.go You've to install the pkg sox note: I haven't tested it yet |
This comment has been minimized.
This comment has been minimized.
Alternatively, GStreamer should also work on linux. gst-launch pulsesrc | go run livecaption.go. |
This comment has been minimized.
This comment has been minimized.
Thanks, I'll give those a shot. I refactored this slightly to package it up, WDYT? https://gist.github.com/ImJasonH/5100f11db40671733a376b230418c484 |
This comment has been minimized.
This comment has been minimized.
(Also untested so far) |
This comment has been minimized.
This comment has been minimized.
Can't make it work. First trouble was auth, which i figured from https://cloud.google.com/speech/docs/common/auth, then no more auth errors but no recognition either... tried different sample rates, no luck. |
This comment has been minimized.
This comment has been minimized.
Hi. Just found this via google while trying to get the speech api working via grpc from go... so thanks it's a big help! I have a .wav file, could you please tell me how I can pipe this in? I'm guessing I'd need to pipe in just the binary audio data and not the wav header, but am not sure how. Thanks! |
This comment has been minimized.
This comment has been minimized.
It's working for me with the following command:
Thanks @rakyll , this is a very useful example! |
This comment has been minimized.
This comment has been minimized.
How I can test this in mac environment? I tried in mac and got this responce only Input File : 'default' (coreaudio) In:0.00% 00:01:02.00 [00:00:00.00] Out:495k [ | ] Clip:0 2016/12/23 13:11:53 sending audio error: EOF pls let me know how to fix it |
This comment has been minimized.
This comment has been minimized.
gst-launch-1.0 command is not working in windows can u tell me the required steps for the installation of it. |
This comment has been minimized.
thanks for this gist! where we should put the OAuth 2.0 client ID or token ?
why not "google.golang.org/genproto/googleapis/cloud/speech/v1" ?