Skip to content

Instantly share code, notes, and snippets.

@bigsnarfdude
Last active February 8, 2024 22:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bigsnarfdude/c45591ab5f3a0b55a476019552c15614 to your computer and use it in GitHub Desktop.
Save bigsnarfdude/c45591ab5f3a0b55a476019552c15614 to your computer and use it in GitHub Desktop.
splytter.py audio splitter
from pydub import AudioSegment
from openai import OpenAI
def split_audio_into_chunks(filename, chunk_duration=600000):
audio = AudioSegment.from_file(filename)
chunk_length = len(audio)
start = 0
chunk_filenames = []
i = 0
while start < chunk_length:
end = start + chunk_duration
chunk = audio[start:end]
chunk_filename = f'chunk_{i+1}.mp3'
chunk.export(chunk_filename, format='mp3')
chunk_filenames.append(chunk_filename)
start += chunk_duration
i += 1
return chunk_filenames
target_file = './files/2.mp4'
chunks = split_audio_into_chunks(target_file)
transcriptions = []
client = OpenAI()
client.api_key = 'sk-superSecret'
for i, chunkz in enumerate(chunks):
audio_file= open(chunks[i], "rb")
transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_file)
transcriptions.append(transcription.text)
with open('transcription.txt', 'w', encoding='utf-8') as file:
for transcription in transcriptions:
file.write(transcription + '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment