maail/transcribe_audio.py

## transcribe_audio.py
#requires ffmpeg to be installed

import os
from tempfile import NamedTemporaryFile
import subprocess
import math
import openai

async def _transcribe_audio(audio_file):
    openai.api_key = OPEN_API_KEY
    transcript = ""

    contents = await audio_file.read()

    save_directory = Path("uploaded_files")
    save_directory.mkdir(exist_ok=True)

    def get_audio_bitrate(file_path):
        command = f"ffprobe -v error -show_entries format=bit_rate -of default=noprint_wrappers=1:nokey=1 {file_path}"
        process = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()
        return int(stdout) if stdout else None

    def get_audio_duration(file_path):
        command = f"ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {file_path}"
        process = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()
        return float(stdout) if stdout else None

    with NamedTemporaryFile("wb+", delete=True, suffix=".mp3") as temp_audio_file:
        temp_audio_file.write(contents)
        temp_audio_file.flush()

        audio_bitrate = get_audio_bitrate(temp_audio_file.name)
        total_duration = get_audio_duration(temp_audio_file.name)
        desired_chunk_size_mb = 24

        chunk_size_seconds = None
        if audio_bitrate:
            chunk_size_seconds = int((desired_chunk_size_mb * 1024 * 1024 * 8) / audio_bitrate)
        else:
            chunk_size_seconds = 200

        total_chunks = math.ceil(total_duration / chunk_size_seconds)

        file_paths = []
        chunk_index = 0
        start_time = 0

        while chunk_index < total_chunks:
            chunk_file_path = save_directory / f"chunk_{chunk_index}.mp3"
            exit_code = os.system(
                f"ffmpeg -ss {start_time} -t {chunk_size_seconds} -i {temp_audio_file.name} {chunk_file_path}"
            )

            if exit_code != 0:
                if chunk_file_path.exists():
                    chunk_file_path.unlink()
                break

            file_paths.append(chunk_file_path)
            chunk_index += 1
            start_time += chunk_size_seconds

        translated_chunks = []

        for i, chunk_file_path in enumerate(file_paths):
            print(f"chunk_{i}")
            with chunk_file_path.open("rb") as chunk_file:
                print("requesting whisper")
                chunk_transcript = openai.Audio.translate("whisper-1", chunk_file)
                print(chunk_transcript)
                print(f"Chunk {i}: {chunk_transcript}")
                translated_chunks.append(chunk_transcript)

                transcript = " ".join(translated_chunks)

        for chunk_file_path in file_paths:
            chunk_file_path.unlink()

    return transcript
	#requires ffmpeg to be installed

	import os
	from tempfile import NamedTemporaryFile
	import subprocess
	import math
	import openai

	async def _transcribe_audio(audio_file):
	openai.api_key = OPEN_API_KEY
	transcript = ""

	contents = await audio_file.read()

	save_directory = Path("uploaded_files")
	save_directory.mkdir(exist_ok=True)

	def get_audio_bitrate(file_path):
	command = f"ffprobe -v error -show_entries format=bit_rate -of default=noprint_wrappers=1:nokey=1 {file_path}"
	process = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout, stderr = process.communicate()
	return int(stdout) if stdout else None

	def get_audio_duration(file_path):
	command = f"ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 {file_path}"
	process = subprocess.Popen(command.split(" "), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout, stderr = process.communicate()
	return float(stdout) if stdout else None

	with NamedTemporaryFile("wb+", delete=True, suffix=".mp3") as temp_audio_file:
	temp_audio_file.write(contents)
	temp_audio_file.flush()

	audio_bitrate = get_audio_bitrate(temp_audio_file.name)
	total_duration = get_audio_duration(temp_audio_file.name)
	desired_chunk_size_mb = 24

	chunk_size_seconds = None
	if audio_bitrate:
	chunk_size_seconds = int((desired_chunk_size_mb * 1024 * 1024 * 8) / audio_bitrate)
	else:
	chunk_size_seconds = 200

	total_chunks = math.ceil(total_duration / chunk_size_seconds)

	file_paths = []
	chunk_index = 0
	start_time = 0

	while chunk_index < total_chunks:
	chunk_file_path = save_directory / f"chunk_{chunk_index}.mp3"
	exit_code = os.system(
	f"ffmpeg -ss {start_time} -t {chunk_size_seconds} -i {temp_audio_file.name} {chunk_file_path}"
	)

	if exit_code != 0:
	if chunk_file_path.exists():
	chunk_file_path.unlink()
	break

	file_paths.append(chunk_file_path)
	chunk_index += 1
	start_time += chunk_size_seconds

	translated_chunks = []

	for i, chunk_file_path in enumerate(file_paths):
	print(f"chunk_{i}")
	with chunk_file_path.open("rb") as chunk_file:
	print("requesting whisper")
	chunk_transcript = openai.Audio.translate("whisper-1", chunk_file)
	print(chunk_transcript)
	print(f"Chunk {i}: {chunk_transcript}")
	translated_chunks.append(chunk_transcript)

	transcript = " ".join(translated_chunks)

	for chunk_file_path in file_paths:
	chunk_file_path.unlink()

	return transcript