Skip to content

Instantly share code, notes, and snippets.

@seidler2547
Created July 1, 2021 18:20
Show Gist options
  • Save seidler2547/0ebfd65f858695e97d7f6f070f3d1425 to your computer and use it in GitHub Desktop.
Save seidler2547/0ebfd65f858695e97d7f6f070f3d1425 to your computer and use it in GitHub Desktop.
Speech to text from mp3/mp4/url using vosk+ffmpeg
#!/usr/bin/env python3
from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import wave
import subprocess
import json
SetLogLevel(0)
if not os.path.exists("model"):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit (1)
sample_rate=16000
model = Model("model")
rec = KaldiRecognizer(model, sample_rate)
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
sys.argv[1],
'-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'],
stdout=subprocess.PIPE)
while True:
data = process.stdout.read(32000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
res = json.loads(rec.Result())
print(res['text'], end = ' ')
sys.stdout.flush()
res = json.loads(rec.FinalResult())
print(res['text'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment