Skip to content

Instantly share code, notes, and snippets.

@imjasonh
Last active October 28, 2021 06:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save imjasonh/78c22b36944b8ec189456e67e63bfaa4 to your computer and use it in GitHub Desktop.
Save imjasonh/78c22b36944b8ec189456e67e63bfaa4 to your computer and use it in GitHub Desktop.
Text-to-Speech-to-Text-to-Speech ad infinitum
package main
import (
"context"
"errors"
"flag"
"fmt"
"log"
"math/rand"
"os"
"time"
"golang.org/x/oauth2"
"google.golang.org/api/option"
"google.golang.org/api/transport"
speech "cloud.google.com/go/speech/apiv1"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
ttspb "google.golang.org/genproto/googleapis/cloud/texttospeech/v1beta1"
)
const (
address = "texttospeech.googleapis.com:443"
)
var (
tok = flag.String("tok", "", "OAuth token")
say = flag.String("say", "hello world", "Seed statement")
n = flag.Int("n", 10, "Iterations")
)
func main() {
flag.Parse()
rand.Seed(time.Now().Unix())
ctx := context.Background()
// Set up a connection to the TTS server.
conn, err := transport.DialGRPC(ctx,
option.WithEndpoint(address),
option.WithTokenSource(oauth2.StaticTokenSource(&oauth2.Token{AccessToken: *tok})))
if err != nil {
log.Fatalf("Dial: %v", err)
}
defer conn.Close()
// Set up connection to Speech server.
sc, err := speech.NewClient(ctx)
if err != nil {
log.Fatalf("NewClient: %v", err)
}
defer sc.Close()
p := &partier{
tts: ttspb.NewTextToSpeechClient(conn),
rec: sc,
say: *say,
n: *n,
}
if err := p.partyOn(ctx); err != nil {
log.Fatal(err)
}
}
type partier struct {
say string
lang string
n int
tts ttspb.TextToSpeechClient
rec *speech.Client
}
func (p *partier) partyOn(ctx context.Context) error {
p.lang = "en-US"
fmt.Printf("%s said: %s\n", icon["en-US"], p.say)
for i := 0; i < p.n; i++ {
b, err := p.speak(ctx)
if err != nil {
return fmt.Errorf("TTS: %v", err)
}
if err := p.write(i, b); err != nil {
return fmt.Errorf("Write: %v", err)
}
p.nextLang()
if i < p.n-1 {
out, err := p.listen(ctx, b)
if err != nil {
return fmt.Errorf("Recognize: %v", err)
}
p.say = out
}
}
return nil
}
var icon = map[string]string{
"fr-FR": "๐Ÿ‡ซ๐Ÿ‡ท",
"de-DE": "๐Ÿ‡ฉ๐Ÿ‡ช",
"fr-CA": "๐Ÿ‡จ๐Ÿ‡ฆ",
"en-GB": "๐Ÿ‡ฌ๐Ÿ‡ง",
"en-US": "๐Ÿ‡บ๐Ÿ‡ธ",
"es-ES": "๐Ÿ‡ช๐Ÿ‡ธ",
}
func (p *partier) nextLang() string {
for k := range icon {
if k != p.lang {
p.lang = k
return k
}
}
panic("unreachable")
}
func (p *partier) speak(ctx context.Context) ([]byte, error) {
resp, err := p.tts.SynthesizeSpeech(ctx, &ttspb.SynthesizeSpeechRequest{
Input: &ttspb.SynthesisInput{
InputSource: &ttspb.SynthesisInput_Text{Text: p.say},
},
Voice: &ttspb.VoiceSelectionParams{
LanguageCode: p.lang,
},
AudioConfig: &ttspb.AudioConfig{
AudioEncoding: ttspb.AudioEncoding_LINEAR16,
},
})
if err != nil {
return nil, err
}
return resp.GetAudioContent(), nil
}
func (p *partier) write(i int, b []byte) error {
f, err := os.Create(fmt.Sprintf("step-%d.wav", i))
if err != nil {
return err
}
defer f.Close()
if _, err := f.Write(b); err != nil {
return err
}
return nil
}
func (p *partier) listen(ctx context.Context, audio []byte) (string, error) {
resp, err := p.rec.Recognize(ctx, &speechpb.RecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 24000,
LanguageCode: p.lang,
},
Audio: &speechpb.RecognitionAudio{AudioSource: &speechpb.RecognitionAudio_Content{Content: audio}},
})
if err != nil {
return "", err
}
if len(resp.GetResults()) == 0 ||
len(resp.GetResults()[0].GetAlternatives()) == 0 {
return "", errors.New("no speech detected")
}
out := resp.GetResults()[0].GetAlternatives()[0].GetTranscript()
fmt.Printf("%s heard: %s\n", icon[p.lang], out)
return out, nil
}
๐Ÿ‡บ๐Ÿ‡ธ said: Mairzy doats and dozy doats and liddle lamzy divey
๐Ÿ‡ช๐Ÿ‡ธ heard: besito besito besito besito
๐Ÿ‡จ๐Ÿ‡ฆ heard: ben si tu veux c'est toi merci tomber si tard
๐Ÿ‡ซ๐Ÿ‡ท heard: ben si tu veux c'est toi merci tomber si tard
๐Ÿ‡บ๐Ÿ‡ธ heard: 2206 Wyman Street in Bay City
๐Ÿ‡ช๐Ÿ‡ธ heard: cita centro salud sexual Madrid
๐Ÿ‡บ๐Ÿ‡ธ heard: cheetah Central Saloon sexual married
๐Ÿ‡ซ๐Ÿ‡ท heard: si la ceinture Saint-Cyr-sur-Mer
๐Ÿ‡ช๐Ÿ‡ธ heard: fina Sรกnchez Sรกnchez
๐Ÿ‡จ๐Ÿ‡ฆ heard: Innocenti Center
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment