Created
July 28, 2015 03:22
-
-
Save maverickagm/56ee25f830ac4440cc70 to your computer and use it in GitHub Desktop.
wit.ai backend patch for speech_recognition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- a/__init__.py 2015-07-27 20:12:31.047928942 -0700 | |
+++ b/__init__.py 2015-07-27 20:11:29.387931799 -0700 | |
@@ -140,6 +140,23 @@ | |
self.dynamic_energy_ratio = 1.5 | |
self.pause_threshold = 0.8 # seconds of quiet time before a phrase is considered complete | |
self.quiet_duration = 0.5 # amount of quiet time to keep on both sides of the recording | |
+ self.stt_engine = 'google' | |
+ | |
+ def samples_to_wav(self, source, frame_data): | |
+ assert isinstance(source, AudioSource), "Source must be an audio source" | |
+ import platform, os, stat | |
+ with io.BytesIO() as wav_file: | |
+ wav_writer = wave.open(wav_file, "wb") | |
+ try: # note that we can't use context manager due to Python 2 not supporting it | |
+ wav_writer.setsampwidth(source.SAMPLE_WIDTH) | |
+ wav_writer.setnchannels(source.CHANNELS) | |
+ wav_writer.setframerate(source.RATE) | |
+ wav_writer.writeframes(frame_data) | |
+ finally: # make sure resources are cleaned up | |
+ wav_writer.close() | |
+ wav_data = wav_file.getvalue() | |
+ return wav_data | |
+ | |
def samples_to_flac(self, source, frame_data): | |
assert isinstance(source, AudioSource), "Source must be an audio source" | |
@@ -209,7 +226,11 @@ | |
frame_data = frames.getvalue() | |
frames.close() | |
- return AudioData(source.RATE, self.samples_to_flac(source, frame_data)) | |
+ if self.stt_engine == 'wit': | |
+ return AudioData(source.RATE, self.samples_to_wav(source, frame_data)) | |
+ else: | |
+ return AudioData(source.RATE, self.samples_to_flac(source, frame_data)) | |
+ | |
def adjust_for_ambient_noise(self, source, duration = 1): | |
""" | |
@@ -299,9 +320,39 @@ | |
for i in range(quiet_buffer_count, pause_count): frames.pop() # remove extra quiet frames at the end | |
frame_data = b"".join(list(frames)) | |
- return AudioData(source.RATE, self.samples_to_flac(source, frame_data)) | |
+ if self.stt_engine == 'wit': | |
+ return AudioData(source.RATE, self.samples_to_wav(source, frame_data)) | |
+ else: | |
+ return AudioData(source.RATE, self.samples_to_flac(source, frame_data)) | |
def recognize(self, audio_data, show_all = False): | |
+ if self.stt_engine == 'wit': | |
+ text = self.recognize_wit(audio_data, show_all) | |
+ return text | |
+ else: | |
+ text = self.recognize_google(audio_data, show_all) | |
+ return text | |
+ | |
+ def recognize_wit(self, audio_data, show_all = False): | |
+ assert isinstance(audio_data, AudioData), "Data must be audio data" | |
+ #res = requests.post(url='https://api.wit.ai/speech?v=20141022', data=audio_data.data, headers={'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'}) | |
+ url = 'https://api.wit.ai/speech?v=20141022' | |
+ self.request = Request(url, data = audio_data.data, headers = {'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'}) | |
+ try: | |
+ response = urlopen(self.request) | |
+ except URLError: | |
+ raise IndexError("No internet connection available to transfer audio data") | |
+ except: | |
+ raise KeyError("Server wouldn't respond (invalid key or quota has been maxed out)") | |
+ response_text = response.read().decode("utf-8") | |
+ result = json.loads(response_text) | |
+ | |
+ if show_all: | |
+ return result | |
+ else: | |
+ return result.get('_text') | |
+ | |
+ def recognize_google(self, audio_data, show_all = False): | |
""" | |
Performs speech recognition, using the Google Speech Recognition API, on ``audio_data`` (an ``AudioData`` instance). | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment