HimbeersaftLP/deepspeech_test.py

## deepspeech_test.py
import sys
import deepspeech as ds

# https://deepspeech.readthedocs.io/en/v0.9.3/Python-Examples.html

# English and Chinese: https://github.com/mozilla/DeepSpeech/releases
# German: https://github.com/AASHISHAG/deepspeech-german#trained-models

print("Loading model...")
model = ds.Model("deepspeech-0.9.3-models.pbmm")
# model = ds.Model("deepspeech-german-0.9.0-output_graph.pbmm")

print("Loading scorer...")
model.enableExternalScorer("deepspeech-0.9.3-models.scorer")
# model.enableExternalScorer("deepspeech-german-0.9.0-kenlm.scorer")

fs = 16000

if (len(sys.argv) < 2):
    # https://python-sounddevice.readthedocs.io/en/latest/usage.html#recording
    import sounddevice as sd
    duration = 5 # seconds
    print("No wav file provided, recording now...")
    audioData = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype="int16")
    sd.wait()
    audioData = list(map(lambda channels : channels[0], audioData))
    print("Done recording!")
else:
    # https://stackoverflow.com/a/65378537/
    import wave
    def read_wav(path):
        with wave.open(path, "rb") as wav:
            nchannels, sampwidth, framerate, nframes, _, _ = wav.getparams()
            signed = sampwidth > 1  # 8 bit wavs are unsigned
            byteorder = sys.byteorder  # wave module uses sys.byteorder for bytes
            values = []  # e.g. for stereo, values[i] = [left_val, right_val]
            for _ in range(nframes):
                frame = wav.readframes(1)  # read next frame
                channel_vals = []  # mono has 1 channel, stereo 2, etc.
                for channel in range(nchannels):
                    as_bytes = frame[channel * sampwidth: (channel + 1) * sampwidth]
                    as_int = int.from_bytes(as_bytes, byteorder, signed=signed)
                    channel_vals.append(as_int)
                values.append(channel_vals[0])
        return values, framerate, sampwidth * 8
    audioData, framerate, bits = read_wav(sys.argv[1])
    if (framerate != fs):
        print("Must be 16khz!")
        sys.exit(1)
    if (bits != 16):
        print("Must be 16bit!")
        sys.exit(1)


print("Processing...")
print(model.stt(audioData))
	import sys
	import deepspeech as ds

	# https://deepspeech.readthedocs.io/en/v0.9.3/Python-Examples.html

	# English and Chinese: https://github.com/mozilla/DeepSpeech/releases
	# German: https://github.com/AASHISHAG/deepspeech-german#trained-models

	print("Loading model...")
	model = ds.Model("deepspeech-0.9.3-models.pbmm")
	# model = ds.Model("deepspeech-german-0.9.0-output_graph.pbmm")

	print("Loading scorer...")
	model.enableExternalScorer("deepspeech-0.9.3-models.scorer")
	# model.enableExternalScorer("deepspeech-german-0.9.0-kenlm.scorer")

	fs = 16000

	if (len(sys.argv) < 2):
	# https://python-sounddevice.readthedocs.io/en/latest/usage.html#recording
	import sounddevice as sd
	duration = 5 # seconds
	print("No wav file provided, recording now...")
	audioData = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype="int16")
	sd.wait()
	audioData = list(map(lambda channels : channels[0], audioData))
	print("Done recording!")
	else:
	# https://stackoverflow.com/a/65378537/
	import wave
	def read_wav(path):
	with wave.open(path, "rb") as wav:
	nchannels, sampwidth, framerate, nframes, _, _ = wav.getparams()
	signed = sampwidth > 1 # 8 bit wavs are unsigned
	byteorder = sys.byteorder # wave module uses sys.byteorder for bytes
	values = [] # e.g. for stereo, values[i] = [left_val, right_val]
	for _ in range(nframes):
	frame = wav.readframes(1) # read next frame
	channel_vals = [] # mono has 1 channel, stereo 2, etc.
	for channel in range(nchannels):
	as_bytes = frame[channel * sampwidth: (channel + 1) * sampwidth]
	as_int = int.from_bytes(as_bytes, byteorder, signed=signed)
	channel_vals.append(as_int)
	values.append(channel_vals[0])
	return values, framerate, sampwidth * 8
	audioData, framerate, bits = read_wav(sys.argv[1])
	if (framerate != fs):
	print("Must be 16khz!")
	sys.exit(1)
	if (bits != 16):
	print("Must be 16bit!")
	sys.exit(1)


	print("Processing...")
	print(model.stt(audioData))