Last active
October 9, 2024 10:19
-
-
Save synesthesia/fe6bf9a3d3e758981f6c9410a709e265 to your computer and use it in GitHub Desktop.
Splitting audio files with python and transcribing with OpenAI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This module splits an audio file into multiple segments based on maximum segment length | |
The default maximum segment length is 15 minutes. | |
Output format is m4a | |
Usage: `python split.py <path_to_audio_file>` | |
Your environment must meet the following requirements: | |
- pydub Python package installed | |
- FFMPEG installed on your platform in the path | |
- Windows `winget install ffmpeg` | |
- MacOS `brew install ffmpeg` | |
- Linux `sudo apt-get install ffmpeg` | |
""" | |
import sys | |
import os | |
from pydub import AudioSegment | |
import math | |
def split_audio(file_path, segment_length=15*60*1000): # 15 minutes in milliseconds | |
# Load the audio file | |
audio = AudioSegment.from_file(file_path) | |
# Get the total length of the audio file | |
total_length = len(audio) | |
# Calculate the number of segments needed | |
num_segments = math.ceil(total_length / segment_length) | |
# Loop through and create each segment | |
for i in range(num_segments): | |
start_time = i * segment_length | |
end_time = min((i + 1) * segment_length, total_length) # Ensure the last segment does not exceed total length | |
segment = audio[start_time:end_time] | |
# Generate the output file name | |
output_file = f"{file_path[:-4]}_part{i+1}.m4a" | |
# Export the segment as an m4a file | |
segment.export(output_file, format="ipod") # see https://github.com/jiaaro/pydub/issues/755 | |
print(f"Exported: {output_file}") | |
source_path = os.path.abspath(sys.argv[1]) | |
split_audio(source_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This module reads audio files and transcribes them using OpenAI's API. | |
Usage: python transcribe <input_wildcard> | |
Where input_wildcard is a wildcard pattern that matches the audio files to be transcribed. | |
For example if the audio files are named "test/audio1.m4a", "test/audio2.m4a", etc., | |
you can use the wildcard "test/audio*.m4a" to transcribe all the files. | |
Each transcription is saved to a text file with the same name as the audio file, but | |
with a .txt extension. | |
All the text files are then combined into a single output file named "output_file.txt". | |
Your environment must meet the following requirements: | |
- OpenAI Python package installed | |
- set OPENAI_API_KEY environment variable to your OpenAI API key | |
""" | |
import os | |
import sys | |
import glob | |
import shutil | |
from openai import OpenAI | |
client = OpenAI() | |
input_wildcard = sys.argv[1] | |
input_files = glob.glob(input_wildcard) | |
print(f"Transcribing from{input_files}") | |
output_files = [] | |
for x in input_files: | |
audio_file= open(x, "rb") | |
print("Transcribing file " + x) | |
transcription = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text", | |
language="en" | |
) | |
output_file = f"{x[:-4]}.txt" | |
output_files.append(output_file) | |
with open(output_file, "a", encoding="utf-8") as f: | |
print(transcription, file=f) | |
print(f"Transcription written to: {output_file}") | |
print(f"Outputs are {output_files}") | |
concat_file = os.path.dirname(output_files[0]) + "/output_file.txt" | |
with open(concat_file,'wb') as wfd: | |
for f in output_files: | |
with open(f,'rb') as fd: | |
shutil.copyfileobj(fd, wfd) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment