Skip to content

Instantly share code, notes, and snippets.

@arctic-hen7
Created July 17, 2023 01:12
Show Gist options
  • Save arctic-hen7/5580d5452f77d4e6b206caf90f8d73c6 to your computer and use it in GitHub Desktop.
Save arctic-hen7/5580d5452f77d4e6b206caf90f8d73c6 to your computer and use it in GitHub Desktop.
A Python script that records audio from the microphone into a temporary file until the user presses Enter, and that then transcribes that audio using Whisper. A plug-and-play way of adding voice non-real-time voice command to Python apps!
import whisper
import pyaudio
import wave
import sys
import tempfile
from ctypes import *
# Load the Whisper model once
model = whisper.load_model("base.en")
# Records audio directly from the microphone and then transcribes it to text using Whisper, returning that transcription.
def transcribe_directly():
# Create a temporary file to store the recorded audio (this will be deleted once we've finished transcription)
temp_file = tempfile.NamedTemporaryFile(suffix=".wav")
sample_rate = 16000
bits_per_sample = 16
chunk_size = 1024
audio_format = pyaudio.paInt16
channels = 1
def callback(in_data, frame_count, time_info, status):
wav_file.writeframes(in_data)
return None, pyaudio.paContinue
# Open the wave file for writing
wav_file = wave.open(temp_file.name, 'wb')
wav_file.setnchannels(channels)
wav_file.setsampwidth(bits_per_sample // 8)
wav_file.setframerate(sample_rate)
# Suppress ALSA warnings (https://stackoverflow.com/a/13453192)
ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
def py_error_handler(filename, line, function, err, fmt):
return
c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
asound = cdll.LoadLibrary('libasound.so')
asound.snd_lib_error_set_handler(c_error_handler)
# Initialize PyAudio
audio = pyaudio.PyAudio()
# Start recording audio
stream = audio.open(format=audio_format,
channels=channels,
rate=sample_rate,
input=True,
frames_per_buffer=chunk_size,
stream_callback=callback)
input("Press Enter to stop recording...")
# Stop and close the audio stream
stream.stop_stream()
stream.close()
audio.terminate()
# Close the wave file
wav_file.close()
# And transcribe the audio to text (suppressing warnings about running on a CPU)
result = model.transcribe(temp_file.name, fp16=False)
temp_file.close()
return result["text"].strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment