ericlewis/.directions.md

## .directions.md

      
    Raw
  

              .directions.md
            
          
    Prerequisites


API key for OpenAI.
API key for Picovoice
API key for ElevenLabs
mpg123 installed
node 18+

Directions


git clone https://gist.github.com/ericlewis/ccd3f0b7a17fcbe2473121a473082c8f
edit .env with your keys
npm i
npm run start
talk


## .env
OPENAI="YOUR_OPENAI_KEY_HERE"
PORCUPINE="YOUR_PICOVOICE_KEY_HERE"
VOICE_KEY="YOUR_ELEVENLABS_KEY_HERE"

VOICE_URI="https://api.elevenlabs.io/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL/stream"
SYSTEM_MESSAGE="You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible."
MODEL="gpt-3.5-turbo"

## index.js
import Listr from "listr";
import { Porcupine, BuiltinKeyword } from "@picovoice/porcupine-node";
import { PvRecorder } from "@picovoice/pvrecorder-node";
import VAD from "node-vad";
import { Configuration, OpenAIApi } from "openai";
import wavConverter from "wav-converter";
import { Readable, pipeline } from "node:stream";
import { spawn } from "child_process";
import { promisify } from "util";
const pipelineAsync = promisify(pipeline);
import * as dotenv from "dotenv";

dotenv.config();
const env = process.env;
const openai = new OpenAIApi(new Configuration({ apiKey: env.OPENAI }));
const porcupine = new Porcupine(env.PORCUPINE, [BuiltinKeyword.JARVIS], [0.95]);
const vad = new VAD(VAD.Mode.NORMAL);
const recorder = new PvRecorder(-1, porcupine.frameLength);
const makeMessage = (role, content) => ({ role, content });
let history = [makeMessage("system", env.SYSTEM_MESSAGE)];

const subtasks = {
  recordingTask: {
	title: "Recording",
	task: async (ctx, task) => {
	  let audioBuffer = Buffer.from([]);
	  async function process() {
		const audioFrame = await recorder.read();
		audioBuffer = Buffer.concat([audioBuffer, Buffer.from(audioFrame.buffer)]);
		while (audioBuffer.length < 50000 * 1.5) {
		  await process();
		}
		const res = await vad.processAudio(audioBuffer, porcupine.sampleRate);
		if (res === VAD.Event.SILENCE) ctx.audio = audioBuffer;
		else await process();
	  }
	  await process();
	  recorder.stop();
	  task.title = `Captured ${audioBuffer.length} audio frames.`;
	},
  },
  transcriptionTask: {
	title: "Transcribing",
	task: async (ctx, task) => {
	  const audioReadStream = Readable.from(wavConverter.encodeWav(ctx.audio));
	  audioReadStream.path = "conversation.wav";
	  const {
		data: { text },
	  } = await openai.createTranscription(audioReadStream, "whisper-1");
	  ctx.transcript = text;
	  task.title = text;
	},
  },
  inferrenceTask: {
	title: "Inferring",
	task: async (ctx, task) => {
	  const newHistory = [...history, makeMessage("user", ctx.transcript)];
	  const request = { messages: newHistory, model: env.MODEL };
	  const { data } = await openai.createChatCompletion(request);
	  ctx.response = data.choices[0].message;
	  history = [...newHistory, ctx.response];
	  task.title = ctx.response.content;
	},
  },
  speechTask: {
	title: "Speaking",
	task: async (ctx, task) => {
	  await new Promise(async (resolve) => {
		const response = await fetch(env.VOICE_URI, {
		  method: "POST",
		  headers: {
		    "Content-Type": "application/json",
			"xi-api-key": env.VOICE_KEY,
		  },
		  body: JSON.stringify({ text: ctx.response.content }),
		});
		const mpg123 = spawn("mpg123", ["-"]);
		await pipelineAsync(response.body, mpg123.stdin);
		mpg123.on("close", (code) => {
		  resolve(`mpg123 process exited with code ${code}`);
		});
	  });
	  task.title = "Finished speaking";
	},
  },
};

const wakeWordTask = {
  title: "Listening",
  task: async (_ctx, task) => {
	recorder.start();
	while (porcupine.process(await recorder.read())) {}
	task.title = "'Jarvis' detected.";
	return new Listr(Object.values(subtasks));
  },
};

while (true) {
  await new Listr([wakeWordTask], { collapse: false }).run();
}

## package.json
{
  "name": "assistant",
  "version": "1.0.0",
  "description": "a conversational chatbot",
  "main": "index.js",
  "type": "module",
  "scripts": {
    "start": "node index.js"
  },
  "author": "Eric Lewis",
  "license": "MIT",
  "dependencies": {
    "@picovoice/porcupine-node": "^2.1.7",
    "@picovoice/pvrecorder-node": "^1.1.2",
    "dotenv": "^16.0.3",
    "listr": "^0.14.3",
    "node-vad": "^1.1.4",
    "openai": "^3.2.1",
    "wav-converter": "^1.0.0"
  }
}
	OPENAI="YOUR_OPENAI_KEY_HERE"
	PORCUPINE="YOUR_PICOVOICE_KEY_HERE"
	VOICE_KEY="YOUR_ELEVENLABS_KEY_HERE"

	VOICE_URI="https://api.elevenlabs.io/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL/stream"
	SYSTEM_MESSAGE="You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possible."
	MODEL="gpt-3.5-turbo"
	import Listr from "listr";
	import { Porcupine, BuiltinKeyword } from "@picovoice/porcupine-node";
	import { PvRecorder } from "@picovoice/pvrecorder-node";
	import VAD from "node-vad";
	import { Configuration, OpenAIApi } from "openai";
	import wavConverter from "wav-converter";
	import { Readable, pipeline } from "node:stream";
	import { spawn } from "child_process";
	import { promisify } from "util";
	const pipelineAsync = promisify(pipeline);
	import * as dotenv from "dotenv";

	dotenv.config();
	const env = process.env;
	const openai = new OpenAIApi(new Configuration({ apiKey: env.OPENAI }));
	const porcupine = new Porcupine(env.PORCUPINE, [BuiltinKeyword.JARVIS], [0.95]);
	const vad = new VAD(VAD.Mode.NORMAL);
	const recorder = new PvRecorder(-1, porcupine.frameLength);
	const makeMessage = (role, content) => ({ role, content });
	let history = [makeMessage("system", env.SYSTEM_MESSAGE)];

	const subtasks = {
	recordingTask: {
	title: "Recording",
	task: async (ctx, task) => {
	let audioBuffer = Buffer.from([]);
	async function process() {
	const audioFrame = await recorder.read();
	audioBuffer = Buffer.concat([audioBuffer, Buffer.from(audioFrame.buffer)]);
	while (audioBuffer.length < 50000 * 1.5) {
	await process();
	}
	const res = await vad.processAudio(audioBuffer, porcupine.sampleRate);
	if (res === VAD.Event.SILENCE) ctx.audio = audioBuffer;
	else await process();
	}
	await process();
	recorder.stop();
	task.title = `Captured ${audioBuffer.length} audio frames.`;
	},
	},
	transcriptionTask: {
	title: "Transcribing",
	task: async (ctx, task) => {
	const audioReadStream = Readable.from(wavConverter.encodeWav(ctx.audio));
	audioReadStream.path = "conversation.wav";
	const {
	data: { text },
	} = await openai.createTranscription(audioReadStream, "whisper-1");
	ctx.transcript = text;
	task.title = text;
	},
	},
	inferrenceTask: {
	title: "Inferring",
	task: async (ctx, task) => {
	const newHistory = [...history, makeMessage("user", ctx.transcript)];
	const request = { messages: newHistory, model: env.MODEL };
	const { data } = await openai.createChatCompletion(request);
	ctx.response = data.choices[0].message;
	history = [...newHistory, ctx.response];
	task.title = ctx.response.content;
	},
	},
	speechTask: {
	title: "Speaking",
	task: async (ctx, task) => {
	await new Promise(async (resolve) => {
	const response = await fetch(env.VOICE_URI, {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	"xi-api-key": env.VOICE_KEY,
	},
	body: JSON.stringify({ text: ctx.response.content }),
	});
	const mpg123 = spawn("mpg123", ["-"]);
	await pipelineAsync(response.body, mpg123.stdin);
	mpg123.on("close", (code) => {
	resolve(`mpg123 process exited with code ${code}`);
	});
	});
	task.title = "Finished speaking";
	},
	},
	};

	const wakeWordTask = {
	title: "Listening",
	task: async (_ctx, task) => {
	recorder.start();
	while (porcupine.process(await recorder.read())) {}
	task.title = "'Jarvis' detected.";
	return new Listr(Object.values(subtasks));
	},
	};

	while (true) {
	await new Listr([wakeWordTask], { collapse: false }).run();
	}
	{
	"name": "assistant",
	"version": "1.0.0",
	"description": "a conversational chatbot",
	"main": "index.js",
	"type": "module",
	"scripts": {
	"start": "node index.js"
	},
	"author": "Eric Lewis",
	"license": "MIT",
	"dependencies": {
	"@picovoice/porcupine-node": "^2.1.7",
	"@picovoice/pvrecorder-node": "^1.1.2",
	"dotenv": "^16.0.3",
	"listr": "^0.14.3",
	"node-vad": "^1.1.4",
	"openai": "^3.2.1",
	"wav-converter": "^1.0.0"
	}
	}