jdittrich/test_transcript.py

## test_transcript.py
#!/usr/bin/env python3
# based on https://github.com/alphacep/vosk-api/blob/master/python/example/test_simple.py
# by  alphacep under Apache 2.0
# modifications by Jan D.
# instead of returning a JSON it returns lines of text: MM:SS,texttexttext
# so you can use them in a transcript or for close captions
# (for the code below to work you need python 3 and pipenv installed. If you do not have pipenv, do
# $ pip install --user pipenv
#
# The lines that start with $ mean that the text of the line after the $ is put in the terminal and executed (press Enter).
# For getting it running do create a folder and open a terminal in it, then, in the terminal, do this:
#
# $ pipenv install vosk
# $ pipenv shell
# $ git clone https://github.com/alphacep/vosk-api
# $ cd vosk-api/python/example
# $ wget https://alphacephei.com/kaldi/models/vosk-model-small-en-us-0.15.zip
# $ unzip vosk-model-small-en-us-0.15.zip
# $ mv vosk-model-small-en-us-0.15 model
# $ python3 ./test_simple.py test.wav
# $ touch test_transcript.py
#
# then open test_transcript.py and paste what is in this file, here
# your myaudiofile.wav (or whatever you feed it) needs to be a mono wav file 16bit PCM
# use e.g. https://www.freac.org/ for creating a copy in the wav format.
#
# to write the transcribed text in a file use
# $ python ./test_transcript.py myaudiofile.wav > mytranscription.md
# the > puts the output in the file after it.
# Adjust the name myaudiofile.wav to the name of your audio file and mytranscription.md to the name of the file you want your transcript be in.


from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import wave
import json
import math

SetLogLevel(0)

if not os.path.exists("model"):
    print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
    exit (1)

wf = wave.open(sys.argv[1], "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
    print ("Audio file must be WAV format mono PCM.")
    exit (1)

model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)

while True:
    data = wf.readframes(4000)
    if len(data) == 0:
        break
    if rec.AcceptWaveform(data):
        resultsjson = json.loads(rec.Result())
        if ("result" in resultsjson and "text" in resultsjson):
            starttime = int(resultsjson["result"][0]["start"])
            timemin = math.floor(starttime/60)
            timesek = starttime % 60
            print(timemin,":",timesek,",",resultsjson["text"],sep="")
    #else:
        #print(rec.PartialResult())

#print(rec.FinalResult())
	#!/usr/bin/env python3
	# based on https://github.com/alphacep/vosk-api/blob/master/python/example/test_simple.py
	# by alphacep under Apache 2.0
	# modifications by Jan D.
	# instead of returning a JSON it returns lines of text: MM:SS,texttexttext
	# so you can use them in a transcript or for close captions
	# (for the code below to work you need python 3 and pipenv installed. If you do not have pipenv, do
	# $ pip install --user pipenv
	#
	# The lines that start with $ mean that the text of the line after the $ is put in the terminal and executed (press Enter).
	# For getting it running do create a folder and open a terminal in it, then, in the terminal, do this:
	#
	# $ pipenv install vosk
	# $ pipenv shell
	# $ git clone https://github.com/alphacep/vosk-api
	# $ cd vosk-api/python/example
	# $ wget https://alphacephei.com/kaldi/models/vosk-model-small-en-us-0.15.zip
	# $ unzip vosk-model-small-en-us-0.15.zip
	# $ mv vosk-model-small-en-us-0.15 model
	# $ python3 ./test_simple.py test.wav
	# $ touch test_transcript.py
	#
	# then open test_transcript.py and paste what is in this file, here
	# your myaudiofile.wav (or whatever you feed it) needs to be a mono wav file 16bit PCM
	# use e.g. https://www.freac.org/ for creating a copy in the wav format.
	#
	# to write the transcribed text in a file use
	# $ python ./test_transcript.py myaudiofile.wav > mytranscription.md
	# the > puts the output in the file after it.
	# Adjust the name myaudiofile.wav to the name of your audio file and mytranscription.md to the name of the file you want your transcript be in.



	from vosk import Model, KaldiRecognizer, SetLogLevel
	import sys
	import os
	import wave
	import json
	import math

	SetLogLevel(0)

	if not os.path.exists("model"):
	print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
	exit (1)

	wf = wave.open(sys.argv[1], "rb")
	if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
	print ("Audio file must be WAV format mono PCM.")
	exit (1)

	model = Model("model")
	rec = KaldiRecognizer(model, wf.getframerate())
	rec.SetWords(True)

	while True:
	data = wf.readframes(4000)
	if len(data) == 0:
	break
	if rec.AcceptWaveform(data):
	resultsjson = json.loads(rec.Result())
	if ("result" in resultsjson and "text" in resultsjson):
	starttime = int(resultsjson["result"][0]["start"])
	timemin = math.floor(starttime/60)
	timesek = starttime % 60
	print(timemin,":",timesek,",",resultsjson["text"],sep="")
	#else:
	#print(rec.PartialResult())

	#print(rec.FinalResult())