Skip to content

Instantly share code, notes, and snippets.

@maail
Created June 12, 2023 04:57
Show Gist options
  • Save maail/fc492530cc567e859dd602e7c2bcb530 to your computer and use it in GitHub Desktop.
Save maail/fc492530cc567e859dd602e7c2bcb530 to your computer and use it in GitHub Desktop.
Transcribe Large Audio Files with Whisper
#requires ffmpeg to be installed
import os
from tempfile import NamedTemporaryFile
import subprocess
import math
import openai
async def _transcribe_audio(audio_file):
openai.api_key = OPEN_API_KEY
transcript = ""
contents = await audio_file.read()
save_directory = Path("uploaded_files")
save_directory.mkdir(exist_ok=True)
def get_audio_bitrate(file_path):
command = f"ffprobe -v error -show_entries format=bit_rate -of default=noprint_wrappers=1:nokey=1 {file_path}"
process = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
return int(stdout) if stdout else None
def get_audio_duration(file_path):
command = f"ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {file_path}"
process = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
return float(stdout) if stdout else None
with NamedTemporaryFile("wb+", delete=True, suffix=".mp3") as temp_audio_file:
temp_audio_file.write(contents)
temp_audio_file.flush()
audio_bitrate = get_audio_bitrate(temp_audio_file.name)
total_duration = get_audio_duration(temp_audio_file.name)
desired_chunk_size_mb = 24
chunk_size_seconds = None
if audio_bitrate:
chunk_size_seconds = int((desired_chunk_size_mb * 1024 * 1024 * 8) / audio_bitrate)
else:
chunk_size_seconds = 200
total_chunks = math.ceil(total_duration / chunk_size_seconds)
file_paths = []
chunk_index = 0
start_time = 0
while chunk_index < total_chunks:
chunk_file_path = save_directory / f"chunk_{chunk_index}.mp3"
exit_code = os.system(
f"ffmpeg -ss {start_time} -t {chunk_size_seconds} -i {temp_audio_file.name} {chunk_file_path}"
)
if exit_code != 0:
if chunk_file_path.exists():
chunk_file_path.unlink()
break
file_paths.append(chunk_file_path)
chunk_index += 1
start_time += chunk_size_seconds
translated_chunks = []
for i, chunk_file_path in enumerate(file_paths):
print(f"chunk_{i}")
with chunk_file_path.open("rb") as chunk_file:
print("requesting whisper")
chunk_transcript = openai.Audio.translate("whisper-1", chunk_file)
print(chunk_transcript)
print(f"Chunk {i}: {chunk_transcript}")
translated_chunks.append(chunk_transcript)
transcript = " ".join(translated_chunks)
for chunk_file_path in file_paths:
chunk_file_path.unlink()
return transcript
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment