Skip to content

Instantly share code, notes, and snippets.

@vleugelcomplement
Created May 14, 2021 22:57
Show Gist options
  • Save vleugelcomplement/58a015b1549440fdf63a6ee7fee3b7f0 to your computer and use it in GitHub Desktop.
Save vleugelcomplement/58a015b1549440fdf63a6ee7fee3b7f0 to your computer and use it in GitHub Desktop.
transcribe mp4 video with vosk engine
#!/usr/bin/env python3
# see https://alphacephei.com/vosk
from vosk import Model, KaldiRecognizer, SetLogLevel
import argparse
import wave
import subprocess
import json
from pathlib import Path
SetLogLevel(0)
parser = argparse.ArgumentParser(
description='извлекает текст из видео',
epilog = "модели можно скачать с https://alphacephei.com/vosk/models",
)
parser.add_argument("video", type=Path, help='имя файла с видео')
parser.add_argument("model", type=Path, help='каталог с языковой моделью')
args = parser.parse_args()
wavpath = args.video.with_suffix(".wav")
txtpath = args.video.with_suffix(".txt")
sample_rate = 16000
model = Model(str(args.model))
process = subprocess.run([
'ffmpeg', '-loglevel', 'quiet', '-i', args.video,
'-ar', str(sample_rate), # resample
'-ac', '1', # single channel
'-f', 'wav', wavpath
])
wf = wave.open(str(wavpath), "rb")
print(f"wav file with {wf.getnframes()} frames")
recognized = open(txtpath, "w")
rec = KaldiRecognizer(model, sample_rate)
text = ""
firsttime = True
while True:
data = wf.readframes(wf.getnframes()) # can be varied
if len(data) == 0:
break
if rec.AcceptWaveform(data):
jres = json.loads(rec.Result())
if not firsttime:
recognized.write("\n")
else:
firsttime = False
recognized.write(jres['text'])
jres = json.loads(rec.FinalResult())
recognized.write("\n" + jres['text'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment