-
-
Save keike/7686256 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/env python | |
# encoding: utf-8 | |
import os | |
import sys | |
import atexit | |
import json | |
import time | |
import tempfile | |
import wave | |
import traceback | |
import urllib2 | |
from subprocess import check_output | |
from Queue import Queue, Empty | |
import numpy as np | |
import pyaudio | |
class Spectrum(object): | |
FORMAT = pyaudio.paFloat32 | |
CHANNELS = 1 | |
FRAME_SIZE = 512 | |
RATE = 16000 # Hz | |
def frames(self, n): | |
return int(n*self.RATE/self.FRAME_SIZE) | |
def __init__(self): | |
self.speak = Queue() | |
self.pa = pyaudio.PyAudio() | |
self.last_samples = None | |
atexit.register(self.pa.terminate) | |
# fft結果のインデックスに対応する周波数値の計算。今回使わなかった。 | |
# self.freq = np.fft.fftfreq(self.FRAME_SIZE, d=self.RATE**-1) | |
self.begin = self.FRAME_SIZE*3/8 | |
self.end = self.FRAME_SIZE/2 | |
self.fque = np.zeros((self.frames(1.0), self.end-self.begin), np.float32) | |
self.buff = np.zeros((self.frames(5.0), 512), np.float32) | |
def fft(self, samples): | |
win = np.hanning(len(samples)) | |
res = np.fft.fftshift(np.fft.fft(win*samples)) | |
return 20*np.log10(np.abs(res)) | |
def callback(self, in_data, frame_count, time_info, status): | |
try: | |
data = np.fromstring(in_data, np.float32) | |
self.buff[0] = data | |
self.buff = np.roll(self.buff, -1, axis=0) | |
if self.status == 0: # 切り出しを始めたら環境音成分平均値の更新は一時停止。 | |
self.fque = np.roll(self.fque, 1, axis=0) | |
self.fque[0] = self.fft(data)[self.begin:self.end] | |
# これが環境音成分の平均値 | |
average = np.average(self.fque, axis=0) | |
values = self.fque[0] - average # fft結果から差っ引く | |
volume = np.average(values) | |
if self.status: | |
self.count += 1 | |
else: | |
self.count == 0 | |
if self.status < 5: | |
if volume>5: | |
self.status += 1 | |
else: | |
self.status = 0 | |
elif self.status == 5: | |
if volume<5: | |
self.status += 1 | |
elif self.status < 15: | |
if volume<5: | |
self.status += 1 | |
else: | |
self.status -= 1 | |
else: | |
self.status = 0 | |
self.speak.put(self.buff[-self.count-2:]) | |
if self.debug: | |
pr = [min(9, max(0, int(v/10))) for v in values] | |
print ''.join([str(i) for i in pr]), self.status | |
return (in_data, self.recording) | |
except KeyboardInterrupt: | |
self.recording = pyaudio.paAbort | |
def start(self, debug=False): | |
self.debug = debug | |
self.status = 0 | |
self.count = 0 | |
self.recording = pyaudio.paContinue | |
self.stream = self.pa.open(format = self.FORMAT, | |
channels = self.CHANNELS, | |
rate = self.RATE, | |
input = True, | |
output = False, | |
frames_per_buffer = self.FRAME_SIZE, | |
stream_callback = self.callback) | |
self.stream.start_stream() | |
def stop(self): | |
self.recording = pyaudio.paAbort | |
while self.stream.is_active(): | |
time.sleep(0.5) | |
self.stream.start_stream() | |
self.stream.close() | |
RECOGNIZE_URL = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=ja-JP" | |
# RECOGNIZE_URL += "&maxresult=10" # これで候補のトップ10が返る。 | |
FLAC_TOOL = 'flac' | |
def recognize(fpath): | |
flac = open(fpath,"rb").read() | |
header = {'Content-Type' : 'audio/x-flac; rate=16000'} | |
req = urllib2.Request(RECOGNIZE_URL, flac, header) | |
data = urllib2.urlopen(req) | |
params = json.loads(data.read()) | |
return params | |
def main(spe): | |
while 1: | |
try: | |
buff = spe.speak.get(timeout=3) | |
with tempfile.NamedTemporaryFile(suffix='.wav') as fp: | |
f = wave.open(fp, 'w') | |
f.setnchannels(1) | |
f.setsampwidth(2) | |
f.setframerate(16000) | |
f.writeframes(np.int16(buff*32768).tostring()) | |
f.close() | |
check_output([FLAC_TOOL, '-sf', fp.name]) | |
output = os.path.splitext(fp.name)[0] + '.flac' | |
res = recognize(output) | |
for i in res.get('hypotheses', []): | |
print i['confidence'], i['utterance'] | |
except KeyboardInterrupt: | |
raise SystemExit(0) | |
except Empty: | |
pass | |
except: | |
traceback.print_exc() | |
time.sleep(5) | |
if __name__=='__main__': | |
spe = Spectrum() | |
spe.start(False) | |
try: | |
main(spe) | |
finally: | |
spe.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment