maverickagm/wit.patch

## wit.patch
--- a/__init__.py	2015-07-27 20:12:31.047928942 -0700
+++ b/__init__.py	2015-07-27 20:11:29.387931799 -0700
@@ -140,6 +140,23 @@
         self.dynamic_energy_ratio = 1.5
         self.pause_threshold = 0.8 # seconds of quiet time before a phrase is considered complete
         self.quiet_duration = 0.5 # amount of quiet time to keep on both sides of the recording
+        self.stt_engine = 'google'
+
+    def samples_to_wav(self, source, frame_data):
+        assert isinstance(source, AudioSource), "Source must be an audio source"
+        import platform, os, stat
+        with io.BytesIO() as wav_file:
+            wav_writer = wave.open(wav_file, "wb")
+            try: # note that we can't use context manager due to Python 2 not supporting it
+                wav_writer.setsampwidth(source.SAMPLE_WIDTH)
+                wav_writer.setnchannels(source.CHANNELS)
+                wav_writer.setframerate(source.RATE)
+                wav_writer.writeframes(frame_data)
+            finally:  # make sure resources are cleaned up
+                wav_writer.close()
+            wav_data = wav_file.getvalue()
+        return wav_data
+

     def samples_to_flac(self, source, frame_data):
         assert isinstance(source, AudioSource), "Source must be an audio source"
@@ -209,7 +226,11 @@

         frame_data = frames.getvalue()
         frames.close()
-        return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
+        if self.stt_engine == 'wit':
+            return AudioData(source.RATE, self.samples_to_wav(source, frame_data))
+        else:
+            return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
+

     def adjust_for_ambient_noise(self, source, duration = 1):
         """
@@ -299,9 +320,39 @@
         for i in range(quiet_buffer_count, pause_count): frames.pop() # remove extra quiet frames at the end
         frame_data = b"".join(list(frames))

-        return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
+        if self.stt_engine == 'wit':
+            return AudioData(source.RATE, self.samples_to_wav(source, frame_data))
+        else:
+            return AudioData(source.RATE, self.samples_to_flac(source, frame_data))

     def recognize(self, audio_data, show_all = False):
+        if self.stt_engine == 'wit':
+            text = self.recognize_wit(audio_data, show_all)
+            return text
+        else:
+            text = self.recognize_google(audio_data, show_all)
+            return text
+
+    def recognize_wit(self, audio_data, show_all = False):
+        assert isinstance(audio_data, AudioData), "Data must be audio data"
+        #res = requests.post(url='https://api.wit.ai/speech?v=20141022', data=audio_data.data, headers={'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'})
+        url = 'https://api.wit.ai/speech?v=20141022'
+        self.request = Request(url, data = audio_data.data, headers = {'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'})
+        try:
+            response = urlopen(self.request)
+        except URLError:
+            raise IndexError("No internet connection available to transfer audio data")
+        except:
+            raise KeyError("Server wouldn't respond (invalid key or quota has been maxed out)")
+        response_text = response.read().decode("utf-8")
+        result = json.loads(response_text)
+
+        if show_all:
+            return result
+        else:
+            return result.get('_text')
+
+    def recognize_google(self, audio_data, show_all = False):
         """
         Performs speech recognition, using the Google Speech Recognition API, on ``audio_data`` (an ``AudioData`` instance).
	--- a/__init__.py 2015-07-27 20:12:31.047928942 -0700
	+++ b/__init__.py 2015-07-27 20:11:29.387931799 -0700
	@@ -140,6 +140,23 @@
	self.dynamic_energy_ratio = 1.5
	self.pause_threshold = 0.8 # seconds of quiet time before a phrase is considered complete
	self.quiet_duration = 0.5 # amount of quiet time to keep on both sides of the recording
	+ self.stt_engine = 'google'
	+
	+ def samples_to_wav(self, source, frame_data):
	+ assert isinstance(source, AudioSource), "Source must be an audio source"
	+ import platform, os, stat
	+ with io.BytesIO() as wav_file:
	+ wav_writer = wave.open(wav_file, "wb")
	+ try: # note that we can't use context manager due to Python 2 not supporting it
	+ wav_writer.setsampwidth(source.SAMPLE_WIDTH)
	+ wav_writer.setnchannels(source.CHANNELS)
	+ wav_writer.setframerate(source.RATE)
	+ wav_writer.writeframes(frame_data)
	+ finally: # make sure resources are cleaned up
	+ wav_writer.close()
	+ wav_data = wav_file.getvalue()
	+ return wav_data
	+

	def samples_to_flac(self, source, frame_data):
	assert isinstance(source, AudioSource), "Source must be an audio source"
	@@ -209,7 +226,11 @@

	frame_data = frames.getvalue()
	frames.close()
	- return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
	+ if self.stt_engine == 'wit':
	+ return AudioData(source.RATE, self.samples_to_wav(source, frame_data))
	+ else:
	+ return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
	+

	def adjust_for_ambient_noise(self, source, duration = 1):
	"""
	@@ -299,9 +320,39 @@
	for i in range(quiet_buffer_count, pause_count): frames.pop() # remove extra quiet frames at the end
	frame_data = b"".join(list(frames))

	- return AudioData(source.RATE, self.samples_to_flac(source, frame_data))
	+ if self.stt_engine == 'wit':
	+ return AudioData(source.RATE, self.samples_to_wav(source, frame_data))
	+ else:
	+ return AudioData(source.RATE, self.samples_to_flac(source, frame_data))

	def recognize(self, audio_data, show_all = False):
	+ if self.stt_engine == 'wit':
	+ text = self.recognize_wit(audio_data, show_all)
	+ return text
	+ else:
	+ text = self.recognize_google(audio_data, show_all)
	+ return text
	+
	+ def recognize_wit(self, audio_data, show_all = False):
	+ assert isinstance(audio_data, AudioData), "Data must be audio data"
	+ #res = requests.post(url='https://api.wit.ai/speech?v=20141022', data=audio_data.data, headers={'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'})
	+ url = 'https://api.wit.ai/speech?v=20141022'
	+ self.request = Request(url, data = audio_data.data, headers = {'Authorization': 'Bearer %s' % (self.key), 'Content-Type': 'audio/wav'})
	+ try:
	+ response = urlopen(self.request)
	+ except URLError:
	+ raise IndexError("No internet connection available to transfer audio data")
	+ except:
	+ raise KeyError("Server wouldn't respond (invalid key or quota has been maxed out)")
	+ response_text = response.read().decode("utf-8")
	+ result = json.loads(response_text)
	+
	+ if show_all:
	+ return result
	+ else:
	+ return result.get('_text')
	+
	+ def recognize_google(self, audio_data, show_all = False):
	"""
	Performs speech recognition, using the Google Speech Recognition API, on ``audio_data`` (an ``AudioData`` instance).