Skip to content

Instantly share code, notes, and snippets.

@ntuaha
Created September 19, 2019 04:37
Show Gist options
  • Save ntuaha/d4598a73791c393fbfa4809c113762a9 to your computer and use it in GitHub Desktop.
Save ntuaha/d4598a73791c393fbfa4809c113762a9 to your computer and use it in GitHub Desktop.
import pyaudio
import wave
from array import array
import subprocess
import numpy
import soundfile as sf
import io
import base64
import requests
import json
from AI_ESB import ai
import subprocess
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = int(16000/2)
RECORD_SECONDS = 15
THRESHOLD = 5000
def main():
# recording prerequisites
talk()
# end of recording
def google_stt(audio):
g = base64.b64encode(audio)
data = '{"config": { "encoding":"FLAC","sampleRateHertz":16000,"languageCode":"cmn-Hant-TW"},"audio": { "content": "%s" }}' % g.decode(
'utf-8')
url = "https://speech.googleapis.com/v1/speech:recognize"
querystring = {"key": "要填自己的喔"}
headers = {'cache-control': "no-cache"}
response = requests.request(
"POST", url, data=data, headers=headers, params=querystring)
ans = json.loads(response.text)
text = ''
if (ans == {}):
return text
for alt in ans['results']:
for item in alt['alternatives']:
text = text + item['transcript']
print(text)
return text
def t2s(text):
file = "output2.mp3"
url = "https://translate.google.com/translate_tts"
querystring = {"ie": "UTF-8", "total": "1", "idx": "0",
"textlen": "128", "client": "tw-ob", "q": text, "tl": "zh-TW"}
headers = {'cache-control': "no-cache"}
response = requests.request(
"GET", url, headers=headers, params=querystring, stream=True)
with open(file, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
#subprocess.call('/usr/bin/play %s' % file, shell=True)
subprocess.call(['/usr/bin/afplay', file])
def talk():
# starting recording
while 1:
# try:
audio = pyaudio.PyAudio() # instantiate the pyaudio
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
frames = []
status = 'ready'
while 1:
data = stream.read(CHUNK)
data_chunk = array('h', data)
vol = max(data_chunk)
print(vol)
if(vol >= THRESHOLD and status == 'ready'):
print("something said")
frames.append(data)
status = 'recording'
print(status)
elif(status == 'ready'):
frames = [data]
elif(vol >= THRESHOLD and status == 'recording'):
frames.append(data)
elif(vol < THRESHOLD and status == 'recording'):
status = 'stop'
frames.append(data)
print(status)
break
else:
print("nothing")
print('save...\n')
# 取出wave buffer轉到 numpy
decoded = numpy.fromstring(b''.join(frames), 'Int16')
flac_buffer = io.BytesIO()
# 將numpy 轉成 flac binary
sf.write(flac_buffer, decoded, samplerate=RATE,
format="FLAC", subtype="PCM_16")
print('save...\n')
google_text = google_stt(flac_buffer.getvalue())
if (google_text != ''):
pass
t = ai.talk(google_text)
# print(t)
t2s(t)
else:
print("QQ沒有回應")
stream.stop_stream()
stream.close()
audio.terminate()
# except :
# print('terminal...\n')
# break
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment