Skip to content

Instantly share code, notes, and snippets.

@maverickagm
Created July 28, 2015 03:22
Show Gist options
  • Save maverickagm/56ee25f830ac4440cc70 to your computer and use it in GitHub Desktop.
Save maverickagm/56ee25f830ac4440cc70 to your computer and use it in GitHub Desktop.
wit.ai backend patch for speech_recognition
--- a/__init__.py 2015-07-27 20:12:31.047928942 -0700
+++ b/__init__.py 2015-07-27 20:11:29.387931799 -0700
@@ -140,6 +140,23 @@
self.dynamic_energy_ratio = 1.5
self.pause_threshold = 0.8 # seconds of quiet time before a phrase is considered complete
self.quiet_duration = 0.5 # amount of quiet time to keep on both sides of the recording
+ self.stt_engine = 'google'
+
+ def samples_to_wav(self, source, frame_data):
+ assert isinstance(source, AudioSource), "Source must be an audio source"
+ import platform, os, stat
+ with io.BytesIO() as wav_file:
+ wav_writer = wave.open(wav_file, "wb")
+ try: # note that we can't use context manager due to Python 2 not supporting it
+ wav_writer.setsampwidth(source.SAMPLE_WIDTH)
+ wav_writer.setnchannels(source.CHANNELS)
+ wav_writer.setframerate(source.RATE)
+ wav_writer.writeframes(frame_data)
+ finally: # make sure resources are cleaned up
+ wav_writer.close()
+ wav_data = wav_file.getvalue()
+ return wav_data
+
def samples_to_flac(self, source, frame_data):
assert isinstance(source, AudioSource), "Source must be an audio source"
@@ -209,7 +226,11 @@
frame_data = frames.getvalue()
frames.close()
- return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
+ if self.stt_engine == 'wit':
+ return AudioData(source.RATE, self.samples_to_wav(source, frame_data))
+ else:
+ return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
+
def adjust_for_ambient_noise(self, source, duration = 1):
"""
@@ -299,9 +320,39 @@
for i in range(quiet_buffer_count, pause_count): frames.pop() # remove extra quiet frames at the end
frame_data = b"".join(list(frames))
- return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
+ if self.stt_engine == 'wit':
+ return AudioData(source.RATE, self.samples_to_wav(source, frame_data))
+ else:
+ return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
def recognize(self, audio_data, show_all = False):
+ if self.stt_engine == 'wit':
+ text = self.recognize_wit(audio_data, show_all)
+ return text
+ else:
+ text = self.recognize_google(audio_data, show_all)
+ return text
+
+ def recognize_wit(self, audio_data, show_all = False):
+ assert isinstance(audio_data, AudioData), "Data must be audio data"
+ #res = requests.post(url='https://api.wit.ai/speech?v=20141022', data=audio_data.data, headers={'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'})
+ url = 'https://api.wit.ai/speech?v=20141022'
+ self.request = Request(url, data = audio_data.data, headers = {'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'})
+ try:
+ response = urlopen(self.request)
+ except URLError:
+ raise IndexError("No internet connection available to transfer audio data")
+ except:
+ raise KeyError("Server wouldn't respond (invalid key or quota has been maxed out)")
+ response_text = response.read().decode("utf-8")
+ result = json.loads(response_text)
+
+ if show_all:
+ return result
+ else:
+ return result.get('_text')
+
+ def recognize_google(self, audio_data, show_all = False):
"""
Performs speech recognition, using the Google Speech Recognition API, on ``audio_data`` (an ``AudioData`` instance).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment