Get deps
go get ./...
Build the binary (or just run go run
see below)
go build
Run
echo "YOUR PROMPT" | ./lagent
Or
echo "YOUR PROMPT" | go run ./...
module lgent | |
go 1.22.1 | |
require ( | |
github.com/faiface/beep v1.1.0 | |
github.com/taigrr/elevenlabs v0.0.11 | |
github.com/tmc/langchaingo v0.1.7 | |
) | |
require ( | |
github.com/dlclark/regexp2 v1.10.0 // indirect | |
github.com/google/uuid v1.6.0 // indirect | |
github.com/hajimehoshi/go-mp3 v0.3.4 // indirect | |
github.com/hajimehoshi/oto v1.0.1 // indirect | |
github.com/pkg/errors v0.9.1 // indirect | |
github.com/pkoukk/tiktoken-go v0.1.6 // indirect | |
golang.org/x/exp/shiny v0.0.0-20230905200255-921286631fa9 // indirect | |
golang.org/x/image v0.12.0 // indirect | |
golang.org/x/mobile v0.0.0-20230906132913-2077a3224571 // indirect | |
golang.org/x/sys v0.16.0 // indirect | |
) |
package main | |
import ( | |
"bufio" | |
"bytes" | |
"context" | |
"errors" | |
"io" | |
"log" | |
"os" | |
"sync" | |
"time" | |
"github.com/faiface/beep" | |
"github.com/faiface/beep/mp3" | |
"github.com/faiface/beep/speaker" | |
"github.com/taigrr/elevenlabs/client" | |
"github.com/taigrr/elevenlabs/client/types" | |
"github.com/tmc/langchaingo/llms" | |
"github.com/tmc/langchaingo/llms/ollama" | |
) | |
const MaxChunkSize = 1000 | |
var opts = types.SynthesisOptions{Stability: 0.75, SimilarityBoost: 0.75} | |
var ( | |
ErrBufferFull = errors.New("buffer is already full") | |
ErrDoneSound = errors.New("done playing sound") | |
) | |
// Buffer wraps bytes.Buffer and restricts its maximum size. | |
type Buffer struct { | |
buffer *bytes.Buffer | |
maxSize int | |
} | |
// NewFixedSizeBuffer creates a new FixedSizeBuffer with the given max size. | |
func NewFixedSizeBuffer(maxSize int) *Buffer { | |
b := make([]byte, 0, maxSize) | |
return &Buffer{ | |
buffer: bytes.NewBuffer(b), | |
maxSize: maxSize, | |
} | |
} | |
// Write appends data to the buffer. It returns an error if the buffer exceeds its maximum size. | |
func (fb *Buffer) Write(p []byte) (int, error) { | |
available := fb.buffer.Available() | |
if available == 0 { | |
return 0, ErrBufferFull | |
} | |
if len(p) > available { | |
p = p[:available] | |
} | |
n, err := fb.buffer.Write(p) | |
if err != nil { | |
return n, err | |
} | |
if fb.buffer.Len() == fb.maxSize { | |
return n, ErrBufferFull | |
} | |
return n, nil | |
} | |
// Reset resets buffer | |
func (fb *Buffer) Reset() { | |
fb.buffer.Reset() | |
} | |
// String returns the contents of the buffer as a string. | |
func (fb *Buffer) String() string { | |
return fb.buffer.String() | |
} | |
// stream audio from elevenlabs using the first voice we found | |
func TTS(ctx context.Context, c client.Client, pw *io.PipeWriter, voiceId string, chunks <-chan []byte, errCh chan<- error, done chan bool) { | |
defer pw.Close() | |
chunkBuf := NewFixedSizeBuffer(MaxChunkSize) | |
for { | |
select { | |
case chunk := <-chunks: | |
if len(chunk) == 0 { | |
ttsErr := c.TTSStream(ctx, pw, chunkBuf.String(), voiceId, opts) | |
if ttsErr != nil { | |
select { | |
case errCh <- ttsErr: | |
return | |
case <-done: | |
return | |
} | |
} | |
return | |
} | |
n, err := chunkBuf.Write(chunk) | |
if err != nil { | |
if err == ErrBufferFull { | |
if err := c.TTSStream(ctx, pw, chunkBuf.String(), voiceId, opts); err != nil { | |
select { | |
case errCh <- err: | |
return | |
case <-done: | |
return | |
} | |
} | |
chunkBuf.Reset() | |
if _, err := buf.Write(chunk[n:]); err != nil { | |
select { | |
case errCh <- err: | |
return | |
case <-done: | |
return | |
} | |
} | |
continue | |
} | |
select { | |
case errCh <- err: | |
return | |
case <-done: | |
return | |
} | |
} | |
case <-done: | |
return | |
} | |
} | |
} | |
func LLM(ctx context.Context, llm *ollama.LLM, prompt string, chunks chan<- []byte, errCh chan<- error, done chan bool) { | |
_, err := llm.Call(ctx, prompt, | |
llms.WithTemperature(0.8), | |
llms.WithStreamingFunc(func(ctx context.Context, chunk []byte) error { | |
select { | |
case <-done: | |
return ErrDoneSound | |
case chunks <- chunk: | |
return nil | |
} | |
}), | |
) | |
if err != nil { | |
select { | |
case errCh <- err: | |
case <-done: | |
} | |
} | |
} | |
func main() { | |
client := client.New(os.Getenv("XI_API_KEY")) | |
ctx := context.Background() | |
ids, err := client.GetVoiceIDs(ctx) | |
if err != nil { | |
log.Fatal("Failed fetching voices from 11Labs", err) | |
} | |
llm, err := ollama.New(ollama.WithModel("llama2")) | |
if err != nil { | |
log.Fatal("Failed creating LLM client: ", err) | |
} | |
reader := bufio.NewReader(os.Stdin) | |
prompt, _ := reader.ReadString('\n') | |
pipeReader, pipeWriter := io.Pipe() | |
errChan := make(chan error, 1) | |
doneSound := make(chan bool) | |
donePrg := make(chan bool) | |
llmChunks := make(chan []byte, 1000) | |
var wg sync.WaitGroup | |
wg.Add(1) | |
go func() { | |
defer wg.Done() | |
log.Println("Starting TTS worker") | |
TTS(ctx, client, pipeWriter, ids[0], llmChunks, errChan, donePrg) | |
}() | |
wg.Add(1) | |
go func() { | |
defer wg.Done() | |
log.Println("Starting LLM worker") | |
LLM(ctx, llm, prompt, llmChunks, errChan, donePrg) | |
}() | |
streamer, format, err := mp3.Decode(pipeReader) | |
if err != nil { | |
log.Fatal("failed to initialize MP3 decoder: ", err) | |
} | |
defer streamer.Close() | |
if err := speaker.Init(format.SampleRate, format.SampleRate.N(time.Second/10)); err != nil { | |
log.Fatal("Failed to initialized speaker: ", err) | |
} | |
speaker.Play(beep.Seq(streamer, beep.Callback(func() { | |
select { | |
case doneSound <- true: | |
case <-donePrg: | |
} | |
}))) | |
select { | |
case <-doneSound: | |
log.Println("Done streaming") | |
case err := <-errChan: | |
log.Printf("LLM stream error: %v", err) | |
} | |
close(donePrg) | |
wg.Wait() | |
} |