Skip to content

Instantly share code, notes, and snippets.

@josejuan
Last active March 5, 2023 21:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save josejuan/808e76c3a6d88940508e2ecfa4d5b00a to your computer and use it in GitHub Desktop.
Save josejuan/808e76c3a6d88940508e2ecfa4d5b00a to your computer and use it in GitHub Desktop.
# python mic.py --access_key ${ACCESS_KEY} --keywords picovoice
from datetime import datetime
from pvrecorder import PvRecorder
from threading import Thread
import argparse
from gtts import gTTS
import array
import os
import io
import re
import pvporcupine
import pygame.mixer
import openai
import struct
import time
import wave
import subprocess
import logging
whisperurl = 'https://api.openai.com/v1/audio/transcriptions'
pygame.mixer.init()
sound = pygame.mixer.Sound('/home/josejuan/Downloads/laser.wav')
sound.play()
openai.api_key = os.getenv("OPENAI_KEY")
modelEngine='text-davinci-003'
prelude = 'Se breve. Responde únicamente el tópico que te pregunto. No aportes información adicional si no la pido. '
maxcontext = 4000
recording = False
class PorcupineDemo(Thread):
def __init__(self, access_key, library_path, model_path, keyword_paths, sensitivities, input_device_index=None):
super(PorcupineDemo, self).__init__()
self._access_key = access_key
self._library_path = library_path
self._model_path = model_path
self._keyword_paths = keyword_paths
self._sensitivities = sensitivities
self._input_device_index = input_device_index
def run(self):
global recording
keywords = list()
for x in self._keyword_paths:
keyword_phrase_part = os.path.basename(x).replace('.ppn', '').split('_')
if len(keyword_phrase_part) > 6:
keywords.append(' '.join(keyword_phrase_part[0:-6]))
else:
keywords.append(keyword_phrase_part[0])
porcupine = None
recorder = None
wav_file = None
porcupine = pvporcupine.create(access_key=self._access_key, library_path=self._library_path, model_path=self._model_path, keyword_paths=self._keyword_paths, sensitivities=self._sensitivities)
recorder = PvRecorder(device_index=self._input_device_index, frame_length=porcupine.frame_length // 2)
recorder.start()
print('Using device: %s', recorder.selected_device)
print('Listening {')
for keyword, sensitivity in zip(keywords, self._sensitivities):
print(' %s (%.2f)' % (keyword, sensitivity))
print('}')
pcm_buffer = []
pcmB = recorder.read()
context = prelude
logging.getLogger("pvporcupine").setLevel(logging.ERROR)
logging.getLogger("PvRecorder").setLevel(logging.ERROR)
logging.getLogger("pvaudio").setLevel(logging.ERROR)
logging.getLogger("pvcommon").setLevel(logging.ERROR)
n = 0
while True:
pcmA = recorder.read()
pcm = pcmB + pcmA
pcmB = pcmA
if recording:
pcm_buffer.append(pcmA)
result = porcupine.process(pcm)
if result >= 0:
sound.play()
recording = not recording
print('Recording ', recording)
if not recording:
print('Enviando datos a Whisper')
recorder.stop()
name = "audio-%04d" % n
n += 1
namea = './audio/' + name + '-a.wav'
nameb = './audio/' + name + '-b.mp3'
with wave.open(namea, 'wb') as archivo:
archivo.setparams((1, 2, 16000, 512, "NONE", "NONE"))
for chunk in pcm_buffer:
archivo.writeframes(struct.pack("h" * len(chunk), *chunk))
rs = openai.Audio.transcribe('whisper-1', open(namea, 'rb'))
prompt = re.sub(r'Kiko.*$', '', rs.text)
print(">> ", prompt)
context += '\n\n' + prompt
cs = openai.Completion.create(engine=modelEngine,prompt=context,max_tokens=1024,n=1,stop=None,temperature=0.5)
rs = cs.choices[0].text.strip()
context += '\n\n' + rs
print('==============================')
print(context)
print('##############################')
if len(context) > maxcontext:
context = context[-maxcontext:]
#subprocess.call(['espeak-ng', '-v', 'es', cs.choices[0].text])
tts = gTTS(rs, lang='es', slow=False)
tts.save(nameb)
subprocess.call(['mplayer', '-speed', '1.25', nameb], stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
pcm_buffer = []
recorder.start()
@classmethod
def show_audio_devices(cls):
devices = PvRecorder.get_audio_devices()
for i in range(len(devices)):
print('index: %d, device name: %s' % (i, devices[i]))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--access_key', help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
parser.add_argument('--keywords', nargs='+', help='List of default keywords for detection. Available keywords: %s' % ', '.join(sorted(pvporcupine.KEYWORDS)), choices=sorted(pvporcupine.KEYWORDS), metavar='')
parser.add_argument('--keyword_paths', nargs='+', help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
parser.add_argument('--library_path', help='Absolute path to dynamic library.', default=pvporcupine.LIBRARY_PATH)
parser.add_argument('--model_path', help='Absolute path to the file containing model parameters.', default=pvporcupine.MODEL_PATH)
parser.add_argument('--sensitivities', nargs='+', help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 will be used.", type=float, default=None)
parser.add_argument('--audio_device_index', help='Index of input audio device.', type=int, default=-1)
parser.add_argument('--show_audio_devices', action='store_true')
args = parser.parse_args()
if args.show_audio_devices:
PorcupineDemo.show_audio_devices()
else:
if args.access_key is None:
raise ValueError("AccessKey (--access_key) is required")
if args.keyword_paths is None:
if args.keywords is None:
raise ValueError("Either `--keywords` or `--keyword_paths` must be set.")
keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in args.keywords]
else:
keyword_paths = args.keyword_paths
if args.sensitivities is None:
args.sensitivities = [0.5] * len(keyword_paths)
if len(keyword_paths) != len(args.sensitivities):
raise ValueError('Number of keywords does not match the number of sensitivities.')
PorcupineDemo(
access_key=args.access_key,
library_path=args.library_path,
model_path=args.model_path,
keyword_paths=keyword_paths,
sensitivities=args.sensitivities,
input_device_index=args.audio_device_index).run()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment