Skip to content

Instantly share code, notes, and snippets.

@asqd
Last active November 23, 2022 09:08
Show Gist options
  • Save asqd/5adeb37564cb2ff4ff8d890df01afa1a to your computer and use it in GitHub Desktop.
Save asqd/5adeb37564cb2ff4ff8d890df01afa1a to your computer and use it in GitHub Desktop.
Python script to slice large mp3 file by chunks
from os import path
from pydub import AudioSegment
import sys
# pydub use ffmpeg to file manupulation
# don't forget to install ffmpeg
sys.path.append('/usr/local/bin/ffmpeg')
def load_songs(fn):
meta = {}
songs = []
file = open_file(fn)
for line in file:
if line[0].isdigit():
song = parse_song(line)
song.update(meta)
song['track'] = str(len(songs) + 1)
if len(songs) > 0:
songs[-1]['time_end'] = song['time_start']
songs.append(song)
else:
if line.strip():
key, value = parse_meta(line.strip())
if value:
meta[key.lower()] = value
return songs
def open_file(fn):
return open(fn)
# returns dict song
def parse_song(line):
time, title = [string.strip() for string in line.split('-', 1)]
return { 'time_start': time, 'title': title }
# returns meta key, value
# Example: 'artist', 'Aerosmith'
def parse_meta(line):
return line.split(':', 1)
def time_to_ms(time):
if isinstance(time, int):
return time
coefficients = [3600000, 60000, 1000]
time_array = time.split(':')
if len(time_array) < 3:
time_array = ['00'] + time_array
sum = 0
for idx, element in enumerate(time_array):
sum += int(element) * coefficients[idx]
return sum
def extract_song(source, song_data, output_path):
time_start = song_data['time_start']
time_end = song_data.get('time_end') or len(source)
tags = extract_tags(song_data)
print("extracting %s \n" % song_data['title'])
song = source[time_to_ms(time_start):time_to_ms(time_end)]
file_name = compose_filename(song_data)
song.export(path.join(output_path, file_name), format="mp3", tags=tags)
def extract_tags(song_data):
tags = ['title', 'album', 'artist', 'year', 'track']
return {tag: song_data[tag] for tag in tags if tag in song_data}
def compose_filename(song_data):
file_name = song_data['track'].zfill(2) + " - " + song_data['title'] + ".mp3"
return file_name.replace("/", '|')
fn = path.normpath('path_to_large_mp3)
output_dir = path.normpath('output_path')
##
# Schema looks like
#
# Artist: Panic! at the Disco
# Album: Pray for the Wicked Tour 2019 - Live at O2 Arena, London 2019
# Year: 2019
# 00:15 - (Fuck A) Silver Lining
# 03:03 - Don't Threaten Me With a Good Time
# 06:34 - Ready to Go (Get Me Out of My Mind)
# ...
##
schema = 'schema_path'
print("reading file \n")
load_songs_schema = load_songs(schema)
record = AudioSegment.from_mp3(fn)
for song_data in load_songs_schema:
extract_song(record, song_data, output_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment