Skip to content

Instantly share code, notes, and snippets.

@aidiary
Created November 9, 2023 12:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aidiary/1585c0208b428e4176f2eadc3098c368 to your computer and use it in GitHub Desktop.
Save aidiary/1585c0208b428e4176f2eadc3098c368 to your computer and use it in GitHub Desktop.
import struct
import threading
import time
import wave
import numpy as np
import pyaudio
from openai import OpenAI
CHUNK = 1000
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 48000
RMSE_THRESHOLD = 0.01
class RecordThread(threading.Thread):
"""収録音声の録音用のスレッド"""
def __init__(self, filename, verbose=False):
super(RecordThread, self).__init__()
self.filename = filename
self.stop_event = threading.Event()
self.daemon = True
self.verbose = verbose
self.buffer = np.array([])
def run(self):
p = pyaudio.PyAudio()
if self.filename is not None:
wf = wave.open(self.filename, "wb")
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
stream = p.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
)
while not self.stop_event.is_set():
frames = stream.read(CHUNK)
if self.filename is not None:
wf.writeframes(b"".join([frames]))
frames = bytes2array(frames)
if self.filename is None:
self.buffer = np.concatenate((self.buffer, frames), axis=None)
# 音声ファイルを保存
stream.stop_stream()
stream.close()
p.terminate()
wf.close()
def stop(self):
self.stop_event.set()
def bytes2array(frames):
count = len(frames) / 2
format = "%dh" % count
frames = struct.unpack(format, frames)
frames = np.array([sample / 32768.0 for sample in frames])
return frames
def main():
client = OpenAI()
record_thread = None
while True:
record_thread = RecordThread("temp.wav")
record_thread.start()
if record_thread is not None and record_thread.is_alive():
# 入力があるまで録音したまま待機
input()
record_thread.stop()
# スレッドが音声をファイルに書き込むまで待機
time.sleep(0.5)
# 音声ファイルに対する処理をここに入れる
transcript = client.audio.transcriptions.create(
model="whisper-1", file=open("temp.wav", "rb"), response_format="text"
).strip()
print(transcript)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment