Skip to content

Instantly share code, notes, and snippets.

@imjasonh
Last active October 28, 2021 06:12
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save imjasonh/5100f11db40671733a376b230418c484 to your computer and use it in GitHub Desktop.
Save imjasonh/5100f11db40671733a376b230418c484 to your computer and use it in GitHub Desktop.
// Copyright 2016 Google Inc. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
// Command caption reads an audio file and outputs the transcript for it.
package main
import (
"io"
"log"
"os"
"golang.org/x/net/context"
"google.golang.org/api/option"
"google.golang.org/api/transport"
speech "google.golang.org/genproto/googleapis/cloud/speech/v1beta1"
)
func main() {
c, err := NewCaption()
if err != nil {
log.Fatalf("NewCaption: %v", err)
}
// Pipe stdin to the captioner.
go io.Copy(c, os.Stdin)
// Read results from the captioner.
for t, ok := range c.Chan() {
if !ok {
log.Println("all done")
break
}
log.Println(t)
}
}
// Captioner is a Writer that accepts audio data and provides a channel to
// receive transcribed text.
type Captioner interface {
io.WriteCloser
Chan() <-chan string
}
type captioner struct {
pw io.PipeWriter
ch chan string
}
func (c *captioner) Chan() <-chan string {
return c.ch
}
// NewCaptioner returns a new Captioner ready to receive audio data.
func NewCaptioner() (Captioner, error) {
ctx := context.Background()
conn, err := transport.DialGRPC(ctx,
option.WithEndpoint("speech.googleapis.com:443"),
option.WithScopes("https://www.googleapis.com/auth/cloud-platform"),
)
if err != nil {
return nil, err
}
defer conn.Close()
stream, err := speech.NewSpeechClient(conn).StreamingRecognize(ctx)
if err != nil {
return nil, err
}
// send the initial configuration message.
if err := stream.Send(&speech.StreamingRecognizeRequest{
StreamingRequest: &speech.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speech.StreamingRecognitionConfig{
Config: &speech.RecognitionConfig{
Encoding: speech.RecognitionConfig_LINEAR16,
SampleRate: 16000,
},
},
},
}); err != nil {
return nil, err
}
pw, pr := io.Pipe()
c := &captioner{pw, make(chan string)}
go func() {
for {
n, err := c.pr.Read()
if err == io.EOF {
return // nothing else to pipe, kill this goroutine
}
if err != nil {
log.Printf("reading buf error: %v", err)
continue
}
if err = stream.Send(&speech.StreamingRecognizeRequest{
StreamingRequest: &speech.StreamingRecognizeRequest_AudioContent{
AudioContent: buf.Bytes()[:n],
},
}); err != nil {
log.Printf("sending audio error: %v", err)
}
}
}()
go func() {
for {
resp, err := stream.Recv()
if err == io.EOF {
close(c.ch) // close the chan
break // no more results, kill this goroutine
}
if err != nil {
// TODO: handle error
continue
}
if resp.Error != nil {
// TODO: handle error
continue
}
for _, result := range resp.Results {
c.ch <- result.Alternatives[0].Transcript
}
}
}()
return c, nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment