ntuaha/voice_bot_prototype.py

## voice_bot_prototype.py
import pyaudio
import wave
from array import array
import subprocess
import numpy
import soundfile as sf
import io
import base64
import requests
import json
from AI_ESB import ai
import subprocess

FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = int(16000/2)
RECORD_SECONDS = 15
THRESHOLD = 5000


def main():
    # recording prerequisites
    talk()
    # end of recording


def google_stt(audio):
    g = base64.b64encode(audio)
    data = '{"config": { "encoding":"FLAC","sampleRateHertz":16000,"languageCode":"cmn-Hant-TW"},"audio": { "content": "%s" }}' % g.decode(
        'utf-8')
    url = "https://speech.googleapis.com/v1/speech:recognize"
    querystring = {"key": "要填自己的喔"}
    headers = {'cache-control': "no-cache"}
    response = requests.request(
        "POST", url, data=data, headers=headers, params=querystring)
    ans = json.loads(response.text)
    text = ''
    if (ans == {}):
        return text
    for alt in ans['results']:
        for item in alt['alternatives']:
            text = text + item['transcript']
    print(text)
    return text


def t2s(text):
    file = "output2.mp3"
    url = "https://translate.google.com/translate_tts"
    querystring = {"ie": "UTF-8", "total": "1", "idx": "0",
                   "textlen": "128", "client": "tw-ob", "q": text, "tl": "zh-TW"}
    headers = {'cache-control': "no-cache"}
    response = requests.request(
        "GET", url, headers=headers, params=querystring, stream=True)
    with open(file, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:  # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()
    #subprocess.call('/usr/bin/play %s' % file, shell=True)
    subprocess.call(['/usr/bin/afplay', file])


def talk():
    # starting recording
    while 1:
        # try:
        audio = pyaudio.PyAudio()  # instantiate the pyaudio
        stream = audio.open(format=FORMAT, channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)
        frames = []
        status = 'ready'
        while 1:
            data = stream.read(CHUNK)
            data_chunk = array('h', data)
            vol = max(data_chunk)
            print(vol)
            if(vol >= THRESHOLD and status == 'ready'):
                print("something said")
                frames.append(data)
                status = 'recording'
                print(status)
            elif(status == 'ready'):
                frames = [data]
            elif(vol >= THRESHOLD and status == 'recording'):
                frames.append(data)
            elif(vol < THRESHOLD and status == 'recording'):
                status = 'stop'
                frames.append(data)
                print(status)
                break
            else:
                print("nothing")
        print('save...\n')
        # 取出wave buffer轉到 numpy
        decoded = numpy.fromstring(b''.join(frames), 'Int16')
        flac_buffer = io.BytesIO()
        # 將numpy 轉成 flac binary
        sf.write(flac_buffer, decoded, samplerate=RATE,
                 format="FLAC", subtype="PCM_16")
        print('save...\n')
        google_text = google_stt(flac_buffer.getvalue())
        if (google_text != ''):
            pass
            t = ai.talk(google_text)
            # print(t)
            t2s(t)
        else:
            print("QQ沒有回應")
        stream.stop_stream()
        stream.close()
        audio.terminate()


        # except :
        #  print('terminal...\n')
        #  break
if __name__ == "__main__":
    main()
	import pyaudio
	import wave
	from array import array
	import subprocess
	import numpy
	import soundfile as sf
	import io
	import base64
	import requests
	import json
	from AI_ESB import ai
	import subprocess

	FORMAT = pyaudio.paInt16
	CHANNELS = 1
	RATE = 16000
	CHUNK = int(16000/2)
	RECORD_SECONDS = 15
	THRESHOLD = 5000


	def main():
	# recording prerequisites
	talk()
	# end of recording


	def google_stt(audio):
	g = base64.b64encode(audio)
	data = '{"config": { "encoding":"FLAC","sampleRateHertz":16000,"languageCode":"cmn-Hant-TW"},"audio": { "content": "%s" }}' % g.decode(
	'utf-8')
	url = "https://speech.googleapis.com/v1/speech:recognize"
	querystring = {"key": "要填自己的喔"}
	headers = {'cache-control': "no-cache"}
	response = requests.request(
	"POST", url, data=data, headers=headers, params=querystring)
	ans = json.loads(response.text)
	text = ''
	if (ans == {}):
	return text
	for alt in ans['results']:
	for item in alt['alternatives']:
	text = text + item['transcript']
	print(text)
	return text


	def t2s(text):
	file = "output2.mp3"
	url = "https://translate.google.com/translate_tts"
	querystring = {"ie": "UTF-8", "total": "1", "idx": "0",
	"textlen": "128", "client": "tw-ob", "q": text, "tl": "zh-TW"}
	headers = {'cache-control': "no-cache"}
	response = requests.request(
	"GET", url, headers=headers, params=querystring, stream=True)
	with open(file, 'wb') as f:
	for chunk in response.iter_content(chunk_size=1024):
	if chunk: # filter out keep-alive new chunks
	f.write(chunk)
	f.flush()
	#subprocess.call('/usr/bin/play %s' % file, shell=True)
	subprocess.call(['/usr/bin/afplay', file])


	def talk():
	# starting recording
	while 1:
	# try:
	audio = pyaudio.PyAudio() # instantiate the pyaudio
	stream = audio.open(format=FORMAT, channels=CHANNELS,
	rate=RATE,
	input=True,
	frames_per_buffer=CHUNK)
	frames = []
	status = 'ready'
	while 1:
	data = stream.read(CHUNK)
	data_chunk = array('h', data)
	vol = max(data_chunk)
	print(vol)
	if(vol >= THRESHOLD and status == 'ready'):
	print("something said")
	frames.append(data)
	status = 'recording'
	print(status)
	elif(status == 'ready'):
	frames = [data]
	elif(vol >= THRESHOLD and status == 'recording'):
	frames.append(data)
	elif(vol < THRESHOLD and status == 'recording'):
	status = 'stop'
	frames.append(data)
	print(status)
	break
	else:
	print("nothing")
	print('save...\n')
	# 取出wave buffer轉到 numpy
	decoded = numpy.fromstring(b''.join(frames), 'Int16')
	flac_buffer = io.BytesIO()
	# 將numpy 轉成 flac binary
	sf.write(flac_buffer, decoded, samplerate=RATE,
	format="FLAC", subtype="PCM_16")
	print('save...\n')
	google_text = google_stt(flac_buffer.getvalue())
	if (google_text != ''):
	pass
	t = ai.talk(google_text)
	# print(t)
	t2s(t)
	else:
	print("QQ沒有回應")
	stream.stop_stream()
	stream.close()
	audio.terminate()


	# except :
	# print('terminal...\n')
	# break
	if __name__ == "__main__":
	main()