Skip to content

Instantly share code, notes, and snippets.

@synesthesia
Last active October 9, 2024 10:19
Show Gist options
  • Save synesthesia/fe6bf9a3d3e758981f6c9410a709e265 to your computer and use it in GitHub Desktop.
Save synesthesia/fe6bf9a3d3e758981f6c9410a709e265 to your computer and use it in GitHub Desktop.
Splitting audio files with python and transcribing with OpenAI
"""
This module splits an audio file into multiple segments based on maximum segment length
The default maximum segment length is 15 minutes.
Output format is m4a
Usage: `python split.py <path_to_audio_file>`
Your environment must meet the following requirements:
- pydub Python package installed
- FFMPEG installed on your platform in the path
- Windows `winget install ffmpeg`
- MacOS `brew install ffmpeg`
- Linux `sudo apt-get install ffmpeg`
"""
import sys
import os
from pydub import AudioSegment
import math
def split_audio(file_path, segment_length=15*60*1000): # 15 minutes in milliseconds
# Load the audio file
audio = AudioSegment.from_file(file_path)
# Get the total length of the audio file
total_length = len(audio)
# Calculate the number of segments needed
num_segments = math.ceil(total_length / segment_length)
# Loop through and create each segment
for i in range(num_segments):
start_time = i * segment_length
end_time = min((i + 1) * segment_length, total_length) # Ensure the last segment does not exceed total length
segment = audio[start_time:end_time]
# Generate the output file name
output_file = f"{file_path[:-4]}_part{i+1}.m4a"
# Export the segment as an m4a file
segment.export(output_file, format="ipod") # see https://github.com/jiaaro/pydub/issues/755
print(f"Exported: {output_file}")
source_path = os.path.abspath(sys.argv[1])
split_audio(source_path)
"""
This module reads audio files and transcribes them using OpenAI's API.
Usage: python transcribe <input_wildcard>
Where input_wildcard is a wildcard pattern that matches the audio files to be transcribed.
For example if the audio files are named "test/audio1.m4a", "test/audio2.m4a", etc.,
you can use the wildcard "test/audio*.m4a" to transcribe all the files.
Each transcription is saved to a text file with the same name as the audio file, but
with a .txt extension.
All the text files are then combined into a single output file named "output_file.txt".
Your environment must meet the following requirements:
- OpenAI Python package installed
- set OPENAI_API_KEY environment variable to your OpenAI API key
"""
import os
import sys
import glob
import shutil
from openai import OpenAI
client = OpenAI()
input_wildcard = sys.argv[1]
input_files = glob.glob(input_wildcard)
print(f"Transcribing from{input_files}")
output_files = []
for x in input_files:
audio_file= open(x, "rb")
print("Transcribing file " + x)
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text",
language="en"
)
output_file = f"{x[:-4]}.txt"
output_files.append(output_file)
with open(output_file, "a", encoding="utf-8") as f:
print(transcription, file=f)
print(f"Transcription written to: {output_file}")
print(f"Outputs are {output_files}")
concat_file = os.path.dirname(output_files[0]) + "/output_file.txt"
with open(concat_file,'wb') as wfd:
for f in output_files:
with open(f,'rb') as fd:
shutil.copyfileobj(fd, wfd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment