Last active December 11, 2015 18:08
#!/usr/bin/env python
# coding: utf-8
import json
import urllib2
from os import system, popen
# generate speech audio file with 'say' in osx and ffmpeg
words = 'Hello world.'
system('say "{}" -o say.aiff'.format(words))
system('ffmpeg -i say.aiff say.flac -loglevel quiet -y')
system('ffplay say.flac -autoexit -v quiet -showmode 0')
# get sample rate
rate = popen('ffprobe -i say.flac -v quiet -show_streams | grep sample_rate | cut -c13-').read().strip()
# speech to text with google speech api
audio = open('say.flac', 'rb').read()
request = urllib2.Request("")
request.add_header('Content-type', 'audio/x-flac; rate={}'.format(rate))
response = urllib2.urlopen(request)
content = json.loads(
if content['status'] == 0:
print '"{}"'.format(content['hypotheses'][0]['utterance'])
print 'Error'
