Created
November 11, 2017 04:25
-
-
Save aallan/3d8fb2b2a396372c5b84cb8892b1ce3a to your computer and use it in GitHub Desktop.
Magic Mirror weather implementation using TensorFlow and Google Cloud Speech API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from collections import deque | |
import threading | |
import requests | |
import time | |
import wave | |
import tensorflow as tf | |
import numpy as np | |
from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio | |
import aiy.audio | |
import aiy.cloudspeech | |
import aiy.voicehat | |
threads = [] | |
class TensorPredictions(object): | |
def __init__(self, time, predictions): | |
self.time_ = time | |
self.predictions_ = predictions | |
def time(self): | |
return self.time_ | |
def predictions(self): | |
return self.predictions_ | |
class TensorProcessor(object): | |
"""A processor that identifies spoken commands from the stream.""" | |
def __init__(self): | |
self.input_samples_name_ = "decoded_sample_data:0" | |
self.input_rate_name_ = "decoded_sample_data:1" | |
self.output_name_ = "labels_softmax:0" | |
self.average_window_duration_ms_ = 500 | |
self.detection_threshold_ = 0.7 | |
self.suppression_ms_ = 1500 | |
self.minimum_count_ = 2 | |
self.sample_rate_ = 16000 | |
self.sample_duration_ms_ = 1000 | |
self.previous_top_label_ = '_silence_' | |
self.previous_top_label_time_ = 0 | |
self.recording_length_ = int((16000*1000) / 1000) | |
self.recording_buffer_ = np.zeros( | |
[self.recording_length_], dtype=np.float32) | |
self.recording_offset_ = 0 | |
self.sess_ = tf.Session() | |
self._load_graph("/home/pi/conv_actions_frozen.pb") | |
self.labels_ = self._load_labels("/home/pi/conv_actions_labels.txt") | |
self.labels_count_ = len(self.labels_) | |
self.previous_results_ = deque() | |
self.triggered = 0 | |
def _load_graph(self, filename): | |
"""Unpersists graph from file as default graph.""" | |
with tf.gfile.FastGFile(filename, 'rb') as f: | |
graph_def = tf.GraphDef() | |
graph_def.ParseFromString(f.read()) | |
tf.import_graph_def(graph_def, name='') | |
def _load_labels(self, filename): | |
"""Read in labels, one label per line.""" | |
return [line.rstrip() for line in tf.gfile.GFile(filename)] | |
def add_data(self, data_bytes): | |
"""Process audio data.""" | |
if not data_bytes: | |
return | |
data = np.frombuffer(data_bytes, dtype=np.int16) | |
current_time_ms = int(round(time.time() * 1000)) | |
number_read = len(data) | |
new_recording_offset = self.recording_offset_ + number_read | |
second_copy_length = max(0, new_recording_offset - self.recording_length_) | |
first_copy_length = number_read - second_copy_length | |
self.recording_buffer_[self.recording_offset_:( | |
self.recording_offset_ + first_copy_length | |
)] = data[:first_copy_length].astype(np.float32) * (1 / 32767.0) | |
self.recording_buffer_[:second_copy_length] = data[ | |
first_copy_length:].astype(np.float32) * (1 / 32767.0) | |
self.recording_offset_ = new_recording_offset % self.recording_length_ | |
input_data = np.concatenate( | |
(self.recording_buffer_[self.recording_offset_:], | |
self.recording_buffer_[:self.recording_offset_])) | |
input_data = input_data.reshape([self.recording_length_, 1]) | |
softmax_tensor = self.sess_.graph.get_tensor_by_name(self.output_name_) | |
predictions, = self.sess_.run(softmax_tensor, { | |
self.input_samples_name_: input_data, | |
self.input_rate_name_: self.sample_rate_ | |
}) | |
if self.previous_results_ and current_time_ms < self.previous_results_[0].time( | |
): | |
raise RuntimeException( | |
'You must feed results in increasing time order, but received a ' | |
'timestamp of ', current_time_ms, | |
' that was earlier than the previous one of ', | |
self.previous_results_[0].time()) | |
self.previous_results_.append( | |
TensorPredictions(current_time_ms, predictions)) | |
# Prune any earlier results that are too old for the averaging window. | |
time_limit = current_time_ms - self.average_window_duration_ms_ | |
while self.previous_results_[0].time() < time_limit: | |
self.previous_results_.popleft() | |
# If there are too few results, assume the result will be unreliable and | |
# bail. | |
how_many_results = len(self.previous_results_) | |
earliest_time = self.previous_results_[0].time() | |
samples_duration = current_time_ms - earliest_time | |
if how_many_results < self.minimum_count_ or samples_duration < ( | |
self.average_window_duration_ms_ / 4): | |
return | |
# Calculate the average score across all the results in the window. | |
average_scores = np.zeros([self.labels_count_]) | |
for result in self.previous_results_: | |
average_scores += result.predictions() * (1.0 / how_many_results) | |
# Sort the averaged results in descending score order. | |
top_result = average_scores.argsort()[-1:][::-1] | |
# See if the latest top score is enough to trigger a detection. | |
current_top_index = top_result[0] | |
current_top_label = self.labels_[current_top_index] | |
current_top_score = average_scores[current_top_index] | |
# If we've recently had another label trigger, assume one that occurs too | |
# soon afterwards is a bad result. | |
if self.previous_top_label_ == '_silence_' or self.previous_top_label_time_ == 0: | |
time_since_last_top = 1000000 | |
self.triggered = 0 | |
else: | |
time_since_last_top = current_time_ms - self.previous_top_label_time_ | |
if current_top_score > self.detection_threshold_ and time_since_last_top > self.suppression_ms_: | |
self.previous_top_label_ = current_top_label | |
self.previous_top_label_time_ = current_time_ms | |
is_new_command = True | |
print(current_top_label) | |
if current_top_label == "go": | |
print('event here') | |
self.triggered = 1 | |
else: | |
self.triggered = 0 | |
else: | |
is_new_command = False | |
self.triggered = 0 | |
def have_hotword(self): | |
if self.triggered: | |
return True | |
else: | |
return False | |
def is_done(self): | |
return False | |
def __enter__(self): | |
return self | |
def __exit__(self): | |
pass | |
def show_weather(): | |
#aiy.audio.say('The weather today') | |
r = requests.get('http://127.0.0.1:8080/remote?action=SHOW&module=module_2_currentweather') | |
r = requests.get('http://127.0.0.1:8080/remote?action=SHOW&module=module_3_weatherforecast') | |
task = threading.Thread(target=hide_weather) | |
threads.append(task) | |
task.start() | |
def hide_weather(): | |
time.sleep(5) | |
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_2_currentweather') | |
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_3_weatherforecast') | |
def play_tinkle(): | |
aiy.audio.play_wave("/home/pi/tinkle.wav") | |
def main(): | |
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_0_clock') | |
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_1_MMM-Remote-Control') | |
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_2_currentweather') | |
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_3_weatherforecast') | |
processor = TensorProcessor() | |
recognizer = aiy.cloudspeech.get_recognizer() | |
recognizer.expect_phrase('weather') | |
aiy.audio.get_recorder().add_processor(processor) | |
aiy.audio.get_recorder().start() | |
while True: | |
if not processor.have_hotword(): | |
continue | |
print('Listening...') | |
text = recognizer.recognize() | |
print('Sending to cloud.') | |
if text is None: | |
print('Sorry, I did not hear you.') | |
else: | |
print('You said "', text, '"') | |
if 'weather' in text: | |
task = threading.Thread(target=play_tinkle) | |
threads.append(task) | |
task.start() | |
show_weather() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment