Skip to content

Instantly share code, notes, and snippets.

@jwheat
Created May 9, 2020 20:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jwheat/87d3b5f795ed921957669a7eaae29006 to your computer and use it in GitHub Desktop.
Save jwheat/87d3b5f795ed921957669a7eaae29006 to your computer and use it in GitHub Desktop.
Rasa voice-to-text test script for Deepspeech 0.6.0
import pyaudio
from deepspeech import Model
import scipy.io.wavfile as wav
import wave
WAVE_OUTPUT_FILENAME = "test_audio.wav"
def record_audio(WAVE_OUTPUT_FILENAME):
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 5
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = [stream.read(CHUNK) for i in range(0, int(RATE / CHUNK * RECORD_SECONDS))]
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def deepspeech_predict(WAVE_OUTPUT_FILENAME):
N_FEATURES = 25
N_CONTEXT = 9
BEAM_WIDTH = 500
LM_ALPHA = 0.75
LM_BETA = 1.85
ds = Model('deepspeech-0.6.1-models/output_graph.pbmm', BEAM_WIDTH)
fs, audio = wav.read(WAVE_OUTPUT_FILENAME)
return ds.stt(audio)
if __name__ == '__main__':
record_audio(WAVE_OUTPUT_FILENAME)
predicted_text = deepspeech_predict(WAVE_OUTPUT_FILENAME)
print(predicted_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment