Created
November 9, 2023 12:01
-
-
Save aidiary/1585c0208b428e4176f2eadc3098c368 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
import threading | |
import time | |
import wave | |
import numpy as np | |
import pyaudio | |
from openai import OpenAI | |
CHUNK = 1000 | |
FORMAT = pyaudio.paInt16 | |
CHANNELS = 1 | |
RATE = 48000 | |
RMSE_THRESHOLD = 0.01 | |
class RecordThread(threading.Thread): | |
"""収録音声の録音用のスレッド""" | |
def __init__(self, filename, verbose=False): | |
super(RecordThread, self).__init__() | |
self.filename = filename | |
self.stop_event = threading.Event() | |
self.daemon = True | |
self.verbose = verbose | |
self.buffer = np.array([]) | |
def run(self): | |
p = pyaudio.PyAudio() | |
if self.filename is not None: | |
wf = wave.open(self.filename, "wb") | |
wf.setnchannels(CHANNELS) | |
wf.setsampwidth(p.get_sample_size(FORMAT)) | |
wf.setframerate(RATE) | |
stream = p.open( | |
format=FORMAT, | |
channels=CHANNELS, | |
rate=RATE, | |
input=True, | |
frames_per_buffer=CHUNK, | |
) | |
while not self.stop_event.is_set(): | |
frames = stream.read(CHUNK) | |
if self.filename is not None: | |
wf.writeframes(b"".join([frames])) | |
frames = bytes2array(frames) | |
if self.filename is None: | |
self.buffer = np.concatenate((self.buffer, frames), axis=None) | |
# 音声ファイルを保存 | |
stream.stop_stream() | |
stream.close() | |
p.terminate() | |
wf.close() | |
def stop(self): | |
self.stop_event.set() | |
def bytes2array(frames): | |
count = len(frames) / 2 | |
format = "%dh" % count | |
frames = struct.unpack(format, frames) | |
frames = np.array([sample / 32768.0 for sample in frames]) | |
return frames | |
def main(): | |
client = OpenAI() | |
record_thread = None | |
while True: | |
record_thread = RecordThread("temp.wav") | |
record_thread.start() | |
if record_thread is not None and record_thread.is_alive(): | |
# 入力があるまで録音したまま待機 | |
input() | |
record_thread.stop() | |
# スレッドが音声をファイルに書き込むまで待機 | |
time.sleep(0.5) | |
# 音声ファイルに対する処理をここに入れる | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", file=open("temp.wav", "rb"), response_format="text" | |
).strip() | |
print(transcript) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment