Skip to content

Instantly share code, notes, and snippets.

@ferrihydrite
Last active July 1, 2024 07:03
Show Gist options
  • Save ferrihydrite/e96dee177614898be250457cd592517a to your computer and use it in GitHub Desktop.
Save ferrihydrite/e96dee177614898be250457cd592517a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Usage:
morphagene_onset.py -w <inputwavfile> -o <outputfile>
Use the Superflux onset detection algorithm with backtracking to generate
splice locations.
Use these splice locations with a converted WAV (to 32-bit float / 48000Hz)
to make Morphagene reels.
This method typically generates splices on each percussion hit of a sample,
so be careful to choose an appropriate length sample or quickly exceed the
limitations of the Morphagene [300 splices].
"""
import librosa
import sys, getopt, os
import struct
import numpy as np
from scipy import interpolate
import warnings
import collections
class WavFileWarning(UserWarning):
pass
_ieee = False
def test_normalized(array):
'''
Determine if an array is entirely -1 < array[i,j] < 1, to see if array is
normalized
'''
return (array > -1).all() and (array < 1).all()
def read(file, readmarkers=False, readmarkerlabels=False,
readmarkerslist=False, readloops=False, readpitch=False,
normalized=False, forcestereo=False):
"""
Return the sample rate (in samples/sec) and data from a WAV file
Parameters
----------
file : file
Input wav file.
Returns
-------
rate : int
Sample rate of wav file
data : np array
Data read from wav file
Notes
-----
* The file can be an open file or a filename.
* The returned sample rate is a Python integer
* The data is returned as a np array with a
data-type determined from the file.
"""
################
## READ SUBFUNCTIONS
## assumes file pointer is immediately
## after the 'fmt ' id
def _read_fmt_chunk(fid):
res = struct.unpack('<ihHIIHH',fid.read(20))
size, comp, noc, rate, sbytes, ba, bits = res
if (comp != 1 or size > 16):
if (comp == 3):
global _ieee
_ieee = True
#warnings.warn("IEEE format not supported", WavFileWarning)
else:
warnings.warn("Unfamiliar format bytes", WavFileWarning)
if (size>16):
fid.read(size-16)
return size, comp, noc, rate, sbytes, ba, bits
# assumes file pointer is immediately
# after the 'data' id
def _read_data_chunk(fid, noc, bits, normalized=False):
size = struct.unpack('<i',fid.read(4))[0]
if bits == 8 or bits == 24:
dtype = 'u1'
bytes = 1
else:
bytes = bits//8
dtype = '<i%d' % bytes
if bits == 32 and _ieee:
dtype = 'float32'
data = np.fromfile(fid, dtype=dtype, count=size//bytes)
if bits == 24:
# handle 24 bit file by using samplewidth=3, no native 24-bit type
a = np.empty((len(data) // 3, 4), dtype='u1')
a[:, :3] = data.reshape((-1, 3))
a[:, 3:] = (a[:, 3 - 1:3] >> 7) * 255
data = a.view('<i4').reshape(a.shape[:-1])
if noc > 1:
# handle stereo
data = data.reshape(-1,noc)
if bool(size & 1):
# if odd number of bytes, move 1 byte further (data chunk is word-aligned)
fid.seek(1,1)
if normalized:
if not test_normalized(data): # only normalize non -1 to 1 data
if bits == 16 or bits == 24 or bits == 32:
normfactor = 2 ** (bits-1)
data = np.float32(data) * 1.0 / normfactor
elif bits == 8:
if isinstance(data[0], (int, np.uint8)):
# handle uint8 data by shifting to center at 0
normfactor = 2 ** (bits-1)
data = (np.float32(data) * 1.0 / normfactor) -\
((normfactor)/(normfactor-1))
else: # ignore normalization for already normalized files
print('File already normalized, passing')
pass
return data
def _skip_unknown_chunk(fid):
data = fid.read(4)
size = struct.unpack('<i', data)[0]
if bool(size & 1):
# if odd number of bytes, move 1 byte further (data chunk is word-aligned)
size += 1
fid.seek(size, 1)
def _read_riff_chunk(fid):
str1 = fid.read(4)
if str1 != b'RIFF':
raise ValueError("Not a WAV file.")
fsize = struct.unpack('<I', fid.read(4))[0] + 8
str2 = fid.read(4)
if (str2 != b'WAVE'):
raise ValueError("Not a WAV file.")
return fsize
##################
if hasattr(file,'read'):
fid = file
else:
fid = open(file, 'rb')
fsize = _read_riff_chunk(fid)
noc = 1
bits = 8
#_cue = []
#_cuelabels = []
_markersdict = collections.defaultdict(lambda: {'position': -1, 'label': ''})
loops = []
pitch = 0.0
while (fid.tell() < fsize):
# read the next chunk
chunk_id = fid.read(4)
if chunk_id == b'fmt ':
size, comp, noc, rate, sbytes, ba, bits = _read_fmt_chunk(fid)
elif chunk_id == b'data':
data = _read_data_chunk(fid, noc, bits, normalized)
elif chunk_id == b'cue ':
str1 = fid.read(8)
size, numcue = struct.unpack('<ii',str1)
for c in range(numcue):
str1 = fid.read(24)
id, position, datachunkid, chunkstart, blockstart, \
sampleoffset = struct.unpack('<iiiiii', str1)
#_cue.append(position)
# needed to match labels and markers
_markersdict[id]['position'] = position
elif chunk_id == b'LIST':
str1 = fid.read(8)
size, type = struct.unpack('<ii', str1)
elif chunk_id in [b'ICRD', b'IENG', b'ISFT', b'ISTJ']:
# see http://www.pjb.com.au/midi/sfspec21.html#i5
_skip_unknown_chunk(fid)
elif chunk_id == b'labl':
str1 = fid.read(8)
size, id = struct.unpack('<ii',str1)
# the size should be even, see WAV specfication, e.g. 16=>16, 23=>24
size = size + (size % 2)
# remove the trailing null characters
label = fid.read(size-4).rstrip('\x00')
#_cuelabels.append(label)
# needed to match labels and markers
_markersdict[id]['label'] = label
elif chunk_id == b'smpl':
str1 = fid.read(40)
size, manuf, prod, sampleperiod, midiunitynote,\
midipitchfraction, smptefmt, smpteoffs, numsampleloops, \
samplerdata = struct.unpack('<iiiiiIiiii', str1)
cents = midipitchfraction * 1./(2**32-1)
pitch = 440. * 2 ** ((midiunitynote + cents - 69.)/12)
for i in range(numsampleloops):
str1 = fid.read(24)
cuepointid, type, start, end, \
fraction, playcount = struct.unpack('<iiiiii', str1)
loops.append([start, end])
else:
warnings.warn("Chunk " + chunk_id + " skipped", WavFileWarning)
_skip_unknown_chunk(fid)
fid.close()
if data.ndim == 1 and forcestereo:
data = np.column_stack((data, data))
_markerslist = sorted([_markersdict[l] for l in _markersdict], key=lambda k: k['position']) # sort by position
_cue = [m['position'] for m in _markerslist]
_cuelabels = [m['label'] for m in _markerslist]
return (rate, data, bits, ) \
+ ((_cue,) if readmarkers else ()) \
+ ((_cuelabels,) if readmarkerlabels else ()) \
+ ((_markerslist,) if readmarkerslist else ()) \
+ ((loops,) if readloops else ()) \
+ ((pitch,) if readpitch else ())
def float32_wav_file(file_name, sample_array, sample_rate,
markers=None, verbose=False):
(M,N)=sample_array.shape
#print "len sample_array=(%d,%d)" % (M,N)
byte_count = M * N * 4 # (len(sample_array)) * 4 # 32-bit floats
wav_file = ""
# write the header
wav_file += struct.pack('<ccccIccccccccIHHIIHH',
'R', 'I', 'F', 'F',
byte_count + 0x2c - 8, # header size
'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
0x10, # size of 'fmt ' header
3, # format 3 = floating-point PCM
M, # channels
sample_rate, # samples / second
sample_rate * 4, # bytes / second
4, # block alignment
32) # bits / sample
wav_file += struct.pack('<ccccI',
'd', 'a', 't', 'a', byte_count)
if verbose:
print("packing data...")
# flatten data in an alternating fashion
# see: http://soundfile.sapp.org/doc/WaveFormat/
reordered_wav = [sample_array[k,j] for j in range(N) for k in range(M)]
wav_file += struct.pack('<%df' % len(reordered_wav), *reordered_wav)
if verbose:
print("saving audio...")
fid=open(file_name,'wb')
for value in wav_file:
fid.write(value)
if markers: # != None and != []
if verbose:
print("saving cue markers...")
if isinstance(markers[0], dict):# then we have [{'position': 100, 'label': 'marker1'}, ...]
labels = [m['label'] for m in markers]
markers = [m['position'] for m in markers]
else:
labels = ['' for m in markers]
fid.write(b'cue ')
size = 4 + len(markers) * 24
fid.write(struct.pack('<ii', size, len(markers)))
for i, c in enumerate(markers):
s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)# 1635017060 is struct.unpack('<i',b'data')
fid.write(s)
lbls = ''
for i, lbl in enumerate(labels):
lbls += b'labl'
label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00')
size = len(lbl) + 1 + 4 # because \x00
lbls += struct.pack('<ii', size, i + 1)
lbls += label
fid.write(b'LIST')
size = len(lbls) + 4
fid.write(struct.pack('<i', size))
fid.write(b'adtl')# https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list
fid.write(lbls)
fid.close()
def onset_splice_superflux(audiofile):
'''
Superflux onset detection method of Boeck and Widmer [2013], modified to
use backtracking to get accurate splice location.
From:
https://librosa.github.io/librosa/auto_examples/plot_superflux.html#sphx-glr-auto-examples-plot-superflux-py
'''
y, sr = librosa.load(audiofile,sr=44100)
# Constants directly from paper
n_fft = 1024
hop_length = int(librosa.time_to_samples(1./200, sr=sr))
lag = 2 # number of frames
n_mels = 138 # number of bins
fmin = 27.5 # lowest frequency
fmax = 16000. #highest frequency
max_size = 3
# Mel spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft,
hop_length=hop_length,
fmin=fmin,
fmax=fmax,
n_mels=n_mels)
# Onset Strength Function
odf_sf = librosa.onset.onset_strength(S=librosa.power_to_db(S, ref=np.max),
sr=sr,
hop_length=hop_length,
lag=lag, max_size=max_size)
# Onset locations in time
onset_sf = librosa.onset.onset_detect(onset_envelope=odf_sf,
sr=sr,
hop_length=hop_length,
units='time',
backtrack=True)
return onset_sf
def change_samplerate_interp(old_audio,old_rate,new_rate):
'''
Change sample rate to new sample rate by simple interpolation.
If old_rate > new_rate, there may be aliasing / data loss.
Input should be in column format, as the interpolation will be completed
on each channel this way.
Modified from:
https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate
'''
if old_rate != new_rate:
# duration of audio
duration = old_audio.shape[0] / old_rate
# length of old and new audio
time_old = np.linspace(0, duration, old_audio.shape[0])
time_new = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate))
# fit old_audio into new_audio length by interpolation
interpolator = interpolate.interp1d(time_old, old_audio.T)
new_audio = interpolator(time_new).T
return new_audio
else:
print('Conversion not needed, old and new rates match')
return old_audio # conversion not needed
def main(argv):
inputwavefile = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hw:o:",["wavfile=","outputfile="])
except getopt.GetoptError:
print('Error in usage, correct format:\n'+\
'morphagene_onset.py -w <inputwavfile> -o <outputfile>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('Morphagene reel creation using Superflux onset detection:\n'+\
'morphagene_onset.py -w <inputwavfile> -o <outputfile>')
sys.exit()
elif opt in ("-w", "--wavfile"):
inputwavefile = arg
elif opt in ("-o", "--outputfile"):
outputfile = arg
print('Input wave file: %s'%inputwavefile)
print('Output Morphagene reel: %s'%outputfile)
###########################################################################
'''
Write single file, with splice locations using the Superflux onset
detection algorithm with backtracking for optimal splice location.
'''
###########################################################################
morph_srate = 48000 # required samplerate for Morphagene
# generate labels and time in seconds of splices using librosa
librosa_sec = np.unique(onset_splice_superflux(inputwavefile))
# read pertinent info from audio file, exit if input wave file is broken
try:
(sample_rate, array, bits, ) = read(inputwavefile,normalized=True)
except:
print('Input .wav file %s is poorly formatted, exiting'%inputwavefile)
sys.exit()
# check if input wav has a different rate than desired Morphagene rate,
# and correct by interpolation
if sample_rate != morph_srate:
print("Correcting input sample rate %iHz to Morphagene rate %iHz"%(sample_rate,morph_srate))
# perform interpolation on each channel, then transpose back
array = change_samplerate_interp(array,float(sample_rate),float(morph_srate)).T
# convert labels in seconds to labels in frames, adjusting for change
# in rate
sc = float(morph_srate) / float(sample_rate)
frame_labs = (librosa_sec * sample_rate * sc).astype(np.int)
else:
array = array.T
frame_labs = (librosa_sec * sample_rate).astype(np.int)
frame_dict = [{'position': l, 'label': 'marker%i'%(i+1)} for i,l in enumerate(frame_labs)]
if len(frame_dict) > 300 or (array.shape[1]/morph_srate)/60. > 2.9:
raise ValueError('Number of splices (%i) and/or audio'%len(frame_dict) + \
' length (%2.1f minutes)'%((array.shape[1]/morph_srate)/60.) + \
'exceed Morphagene limits [300 splices / 2.9 minutes]')
# write wav file with additional cue markers from labels
float32_wav_file(outputfile,array,morph_srate,markers=frame_dict)
print('Saved Morphagene reel with %i splices: %s'%(len(frame_labs),outputfile))
name = os.path.splitext(inputwavefile)[0]
np.savetxt('%s.txt'%name,librosa_sec,fmt='%03.6f',delimiter='\t')
if __name__ == "__main__":
main(sys.argv[1:])
Copy link

ghost commented Mar 18, 2021

Will there be an update to this script by chance? It seems to have stopped working with librosa no matter what version I roll back to.

I'm currently in the process of converting this to Python3, and while there are a few issues with writing the labels, librosa 0.7.2 works for me. Keep your eye out for the new version soon!

Hey! Any news on the conversion to python3?

@ferrihydrite
Copy link
Author

Will there be an update to this script by chance? It seems to have stopped working with librosa no matter what version I roll back to.

I'm currently in the process of converting this to Python3, and while there are a few issues with writing the labels, librosa 0.7.2 works for me. Keep your eye out for the new version soon!

Hey! Any news on the conversion to python3?

Hey! just got a new one up (check my other files). I've tested it on mac and windows and on several wav files and it seems to work.

Copy link

ghost commented Mar 21, 2021

Will there be an update to this script by chance? It seems to have stopped working with librosa no matter what version I roll back to.

I'm currently in the process of converting this to Python3, and while there are a few issues with writing the labels, librosa 0.7.2 works for me. Keep your eye out for the new version soon!

Hey! Any news on the conversion to python3?

Hey! just got a new one up (check my other files). I've tested it on mac and windows and on several wav files and it seems to work.

Amazing! Thanks so much, been looking forward to this for a while.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment