Skip to content

Instantly share code, notes, and snippets.

@aallan
Created November 11, 2017 04:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save aallan/3d8fb2b2a396372c5b84cb8892b1ce3a to your computer and use it in GitHub Desktop.
Save aallan/3d8fb2b2a396372c5b84cb8892b1ce3a to your computer and use it in GitHub Desktop.
Magic Mirror weather implementation using TensorFlow and Google Cloud Speech API
#!/usr/bin/env python3
from collections import deque
import threading
import requests
import time
import wave
import tensorflow as tf
import numpy as np
from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio
import aiy.audio
import aiy.cloudspeech
import aiy.voicehat
threads = []
class TensorPredictions(object):
def __init__(self, time, predictions):
self.time_ = time
self.predictions_ = predictions
def time(self):
return self.time_
def predictions(self):
return self.predictions_
class TensorProcessor(object):
"""A processor that identifies spoken commands from the stream."""
def __init__(self):
self.input_samples_name_ = "decoded_sample_data:0"
self.input_rate_name_ = "decoded_sample_data:1"
self.output_name_ = "labels_softmax:0"
self.average_window_duration_ms_ = 500
self.detection_threshold_ = 0.7
self.suppression_ms_ = 1500
self.minimum_count_ = 2
self.sample_rate_ = 16000
self.sample_duration_ms_ = 1000
self.previous_top_label_ = '_silence_'
self.previous_top_label_time_ = 0
self.recording_length_ = int((16000*1000) / 1000)
self.recording_buffer_ = np.zeros(
[self.recording_length_], dtype=np.float32)
self.recording_offset_ = 0
self.sess_ = tf.Session()
self._load_graph("/home/pi/conv_actions_frozen.pb")
self.labels_ = self._load_labels("/home/pi/conv_actions_labels.txt")
self.labels_count_ = len(self.labels_)
self.previous_results_ = deque()
self.triggered = 0
def _load_graph(self, filename):
"""Unpersists graph from file as default graph."""
with tf.gfile.FastGFile(filename, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
def _load_labels(self, filename):
"""Read in labels, one label per line."""
return [line.rstrip() for line in tf.gfile.GFile(filename)]
def add_data(self, data_bytes):
"""Process audio data."""
if not data_bytes:
return
data = np.frombuffer(data_bytes, dtype=np.int16)
current_time_ms = int(round(time.time() * 1000))
number_read = len(data)
new_recording_offset = self.recording_offset_ + number_read
second_copy_length = max(0, new_recording_offset - self.recording_length_)
first_copy_length = number_read - second_copy_length
self.recording_buffer_[self.recording_offset_:(
self.recording_offset_ + first_copy_length
)] = data[:first_copy_length].astype(np.float32) * (1 / 32767.0)
self.recording_buffer_[:second_copy_length] = data[
first_copy_length:].astype(np.float32) * (1 / 32767.0)
self.recording_offset_ = new_recording_offset % self.recording_length_
input_data = np.concatenate(
(self.recording_buffer_[self.recording_offset_:],
self.recording_buffer_[:self.recording_offset_]))
input_data = input_data.reshape([self.recording_length_, 1])
softmax_tensor = self.sess_.graph.get_tensor_by_name(self.output_name_)
predictions, = self.sess_.run(softmax_tensor, {
self.input_samples_name_: input_data,
self.input_rate_name_: self.sample_rate_
})
if self.previous_results_ and current_time_ms < self.previous_results_[0].time(
):
raise RuntimeException(
'You must feed results in increasing time order, but received a '
'timestamp of ', current_time_ms,
' that was earlier than the previous one of ',
self.previous_results_[0].time())
self.previous_results_.append(
TensorPredictions(current_time_ms, predictions))
# Prune any earlier results that are too old for the averaging window.
time_limit = current_time_ms - self.average_window_duration_ms_
while self.previous_results_[0].time() < time_limit:
self.previous_results_.popleft()
# If there are too few results, assume the result will be unreliable and
# bail.
how_many_results = len(self.previous_results_)
earliest_time = self.previous_results_[0].time()
samples_duration = current_time_ms - earliest_time
if how_many_results < self.minimum_count_ or samples_duration < (
self.average_window_duration_ms_ / 4):
return
# Calculate the average score across all the results in the window.
average_scores = np.zeros([self.labels_count_])
for result in self.previous_results_:
average_scores += result.predictions() * (1.0 / how_many_results)
# Sort the averaged results in descending score order.
top_result = average_scores.argsort()[-1:][::-1]
# See if the latest top score is enough to trigger a detection.
current_top_index = top_result[0]
current_top_label = self.labels_[current_top_index]
current_top_score = average_scores[current_top_index]
# If we've recently had another label trigger, assume one that occurs too
# soon afterwards is a bad result.
if self.previous_top_label_ == '_silence_' or self.previous_top_label_time_ == 0:
time_since_last_top = 1000000
self.triggered = 0
else:
time_since_last_top = current_time_ms - self.previous_top_label_time_
if current_top_score > self.detection_threshold_ and time_since_last_top > self.suppression_ms_:
self.previous_top_label_ = current_top_label
self.previous_top_label_time_ = current_time_ms
is_new_command = True
print(current_top_label)
if current_top_label == "go":
print('event here')
self.triggered = 1
else:
self.triggered = 0
else:
is_new_command = False
self.triggered = 0
def have_hotword(self):
if self.triggered:
return True
else:
return False
def is_done(self):
return False
def __enter__(self):
return self
def __exit__(self):
pass
def show_weather():
#aiy.audio.say('The weather today')
r = requests.get('http://127.0.0.1:8080/remote?action=SHOW&module=module_2_currentweather')
r = requests.get('http://127.0.0.1:8080/remote?action=SHOW&module=module_3_weatherforecast')
task = threading.Thread(target=hide_weather)
threads.append(task)
task.start()
def hide_weather():
time.sleep(5)
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_2_currentweather')
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_3_weatherforecast')
def play_tinkle():
aiy.audio.play_wave("/home/pi/tinkle.wav")
def main():
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_0_clock')
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_1_MMM-Remote-Control')
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_2_currentweather')
r = requests.get('http://127.0.0.1:8080/remote?action=HIDE&module=module_3_weatherforecast')
processor = TensorProcessor()
recognizer = aiy.cloudspeech.get_recognizer()
recognizer.expect_phrase('weather')
aiy.audio.get_recorder().add_processor(processor)
aiy.audio.get_recorder().start()
while True:
if not processor.have_hotword():
continue
print('Listening...')
text = recognizer.recognize()
print('Sending to cloud.')
if text is None:
print('Sorry, I did not hear you.')
else:
print('You said "', text, '"')
if 'weather' in text:
task = threading.Thread(target=play_tinkle)
threads.append(task)
task.start()
show_weather()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment