synesthesia/split.py

## split.py
"""
This module splits an audio file into multiple segments based on maximum segment length

The default maximum segment length is 15 minutes.

Output format is m4a

Usage: `python split.py <path_to_audio_file>`

Your environment must meet the following requirements:
  - pydub Python package installed
  - FFMPEG installed on your platform in the path
    - Windows `winget install ffmpeg`
    - MacOS `brew install ffmpeg`
    - Linux `sudo apt-get install ffmpeg`
"""

import sys
import os
from pydub import AudioSegment
import math

def split_audio(file_path, segment_length=15*60*1000):  # 15 minutes in milliseconds
    # Load the audio file
    audio = AudioSegment.from_file(file_path)

    # Get the total length of the audio file
    total_length = len(audio)

    # Calculate the number of segments needed
    num_segments = math.ceil(total_length / segment_length)

    # Loop through and create each segment
    for i in range(num_segments):
        start_time = i * segment_length
        end_time = min((i + 1) * segment_length, total_length)  # Ensure the last segment does not exceed total length
        segment = audio[start_time:end_time]

        # Generate the output file name
        output_file = f"{file_path[:-4]}_part{i+1}.m4a"

        # Export the segment as an m4a file
        segment.export(output_file, format="ipod") # see https://github.com/jiaaro/pydub/issues/755
        print(f"Exported: {output_file}")


source_path = os.path.abspath(sys.argv[1])
split_audio(source_path)

## transcribe.py
"""
This module reads audio files and transcribes them using OpenAI's API.

Usage:  python transcribe <input_wildcard>

Where input_wildcard is a wildcard pattern that matches the audio files to be transcribed.

For example if the audio files are named "test/audio1.m4a", "test/audio2.m4a", etc.,
    you can use the wildcard "test/audio*.m4a" to transcribe all the files.

Each transcription is saved to a text file with the same name as the audio file, but
with a .txt extension.

All the text files are then combined into a single output file named "output_file.txt".

Your environment must meet the following requirements:
  - OpenAI Python package installed
  - set OPENAI_API_KEY environment variable to your OpenAI API key


"""
import os
import sys
import glob
import shutil
from openai import OpenAI

client = OpenAI()

input_wildcard = sys.argv[1]
input_files = glob.glob(input_wildcard)
print(f"Transcribing from{input_files}")
output_files = []

for x in input_files:
    audio_file= open(x, "rb")
    print("Transcribing file " + x)
    transcription = client.audio.transcriptions.create(
        model="whisper-1",
        file=audio_file,
        response_format="text",
        language="en"
    )
    output_file = f"{x[:-4]}.txt"
    output_files.append(output_file)
    with open(output_file, "a", encoding="utf-8") as f:
        print(transcription, file=f)
        print(f"Transcription written to: {output_file}")

print(f"Outputs are {output_files}")
concat_file = os.path.dirname(output_files[0]) + "/output_file.txt"

with open(concat_file,'wb') as wfd:
    for f in output_files:
        with open(f,'rb') as fd:
            shutil.copyfileobj(fd, wfd)
	"""
	This module splits an audio file into multiple segments based on maximum segment length

	The default maximum segment length is 15 minutes.

	Output format is m4a

	Usage: `python split.py <path_to_audio_file>`

	Your environment must meet the following requirements:
	- pydub Python package installed
	- FFMPEG installed on your platform in the path
	- Windows `winget install ffmpeg`
	- MacOS `brew install ffmpeg`
	- Linux `sudo apt-get install ffmpeg`
	"""

	import sys
	import os
	from pydub import AudioSegment
	import math

	def split_audio(file_path, segment_length=15601000): # 15 minutes in milliseconds
	# Load the audio file
	audio = AudioSegment.from_file(file_path)

	# Get the total length of the audio file
	total_length = len(audio)

	# Calculate the number of segments needed
	num_segments = math.ceil(total_length / segment_length)

	# Loop through and create each segment
	for i in range(num_segments):
	start_time = i * segment_length
	end_time = min((i + 1) * segment_length, total_length) # Ensure the last segment does not exceed total length
	segment = audio[start_time:end_time]

	# Generate the output file name
	output_file = f"{file_path[:-4]}_part{i+1}.m4a"

	# Export the segment as an m4a file
	segment.export(output_file, format="ipod") # see https://github.com/jiaaro/pydub/issues/755
	print(f"Exported: {output_file}")


	source_path = os.path.abspath(sys.argv[1])
	split_audio(source_path)
	"""
	This module reads audio files and transcribes them using OpenAI's API.

	Usage: python transcribe <input_wildcard>

	Where input_wildcard is a wildcard pattern that matches the audio files to be transcribed.

	For example if the audio files are named "test/audio1.m4a", "test/audio2.m4a", etc.,
	you can use the wildcard "test/audio*.m4a" to transcribe all the files.

	Each transcription is saved to a text file with the same name as the audio file, but
	with a .txt extension.

	All the text files are then combined into a single output file named "output_file.txt".

	Your environment must meet the following requirements:
	- OpenAI Python package installed
	- set OPENAI_API_KEY environment variable to your OpenAI API key


	"""
	import os
	import sys
	import glob
	import shutil
	from openai import OpenAI

	client = OpenAI()

	input_wildcard = sys.argv[1]
	input_files = glob.glob(input_wildcard)
	print(f"Transcribing from{input_files}")
	output_files = []

	for x in input_files:
	audio_file= open(x, "rb")
	print("Transcribing file " + x)
	transcription = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text",
	language="en"
	)
	output_file = f"{x[:-4]}.txt"
	output_files.append(output_file)
	with open(output_file, "a", encoding="utf-8") as f:
	print(transcription, file=f)
	print(f"Transcription written to: {output_file}")

	print(f"Outputs are {output_files}")
	concat_file = os.path.dirname(output_files[0]) + "/output_file.txt"

	with open(concat_file,'wb') as wfd:
	for f in output_files:
	with open(f,'rb') as fd:
	shutil.copyfileobj(fd, wfd)