Skip to content

Instantly share code, notes, and snippets.

@knandersen
Last active May 12, 2021 03:16
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save knandersen/0f41fc132e584bc66958adca590aed12 to your computer and use it in GitHub Desktop.
Save knandersen/0f41fc132e584bc66958adca590aed12 to your computer and use it in GitHub Desktop.
Takes an input directory containing wave files and splices them together into a morphagene reel
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
USAGE:
morphagene_directory.py -d <inputdirectory> -o <outputfile>'
Script will go through <inputdirectory> and splice all wave-files
together in alphabetic order and place a marker in between.
Requires all wave-files in directory to have the same number of channels and bitrate!
Does not require input file to be 48000Hz, but .WAV-files must be stereo.
See the Morphagene manual for naming conventions of output files:
http://www.makenoisemusic.com/content/manuals/morphagene-manual.pdf
# see http://stackoverflow.com/questions/15576798/create-32bit-float-wav-file-in-python
# see... http://blog.theroyweb.com/extracting-wav-file-header-information-using-a-python-script
# marker code from Joseph Basquin [https://gist.github.com/josephernest/3f22c5ed5dabf1815f16efa8fa53d476]
"""
import sys, getopt
import struct
import numpy as np
from scipy import interpolate
import gzip
import os
def float32_wav_file(file_name, sample_array, sample_rate,
markers=None, verbose=False):
(M,N)=sample_array.shape
#print "len sample_array=(%d,%d)" % (M,N)
byte_count = M * N * 4 # (len(sample_array)) * 4 # 32-bit floats
wav_file = ""
# write the header
wav_file += struct.pack('<ccccIccccccccIHHIIHH',
'R', 'I', 'F', 'F',
byte_count + 0x2c - 8, # header size
'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
0x10, # size of 'fmt ' header
3, # format 3 = floating-point PCM
M, # channels
sample_rate, # samples / second
sample_rate * 4, # bytes / second
4, # block alignment
32) # bits / sample
wav_file += struct.pack('<ccccI',
'd', 'a', 't', 'a', byte_count)
if verbose:
print("packing...")
# flatten data in an alternating fashion
# see: http://soundfile.sapp.org/doc/WaveFormat/
reordered_wav = [sample_array[k,j] for j in range(N) for k in range(M)]
wav_file += struct.pack('<%df' % len(reordered_wav), *reordered_wav)
if verbose:
print("saving audio...")
fid=open(file_name,'wb')
for value in wav_file:
fid.write(value)
if markers: # != None and != []
if verbose:
print("saving cue markers...")
if isinstance(markers[0], dict):# then we have [{'position': 100, 'label': 'marker1'}, ...]
labels = [m['label'] for m in markers]
markers = [m['position'] for m in markers]
else:
labels = ['' for m in markers]
fid.write(b'cue ')
size = 4 + len(markers) * 24
fid.write(struct.pack('<ii', size, len(markers)))
for i, c in enumerate(markers):
s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)# 1635017060 is struct.unpack('<i',b'data')
fid.write(s)
lbls = ''
for i, lbl in enumerate(labels):
lbls += b'labl'
label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00')
size = len(lbl) + 1 + 4 # because \x00
lbls += struct.pack('<ii', size, i + 1)
lbls += label
fid.write(b'LIST')
size = len(lbls) + 4
fid.write(struct.pack('<i', size))
fid.write(b'adtl')# https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list
fid.write(lbls)
fid.close()
def wav_file_read(filename,verbose=False):
# read file and close
fi=open(filename,'rb')
data=fi.read()
fi.close()
# take raw data and read subsections for important format data
A,B,C,D=struct.unpack('4c', data[0:4]) # 'RIFF'
ChunkSize=struct.unpack('<l', data[4:8])[0] #4+(8+SubChunk1Size)+8+SubChunk2Size)
A,B,C,D=struct.unpack('4c', data[8:12]) # 'WAVE'
A,B,C,D=struct.unpack('4c', data[12:16]) # 'fmt '
Subchunk1Size=struct.unpack('<l', data[16:20])[0] # LITTLE ENDIAN, long, 16
AudioFormat=struct.unpack('<h', data[20:22])[0] # LITTLE ENDIAN, short, 1
NumChannels=struct.unpack('<h', data[22:24])[0] # LITTLE ENDIAN, short, Mono = 1, Stereo = 2
SampleRate =struct.unpack('<l', data[24:28])[0] # LITTLE ENDIAN, long, sample rate in samples per second
ByteRate=struct.unpack('<l', data[28:32])[0] # self.SampleRate * self.NumChannels * self.BitsPerSample/8)) # (ByteRate) LITTLE ENDIAN, long
BlockAlign=struct.unpack('<h', data[32:34])[0] # self.NumChannels * self.BitsPerSample/8)) # (BlockAlign) LITTLE ENDIAN, short
BitsPerSample=struct.unpack('<h', data[34:36])[0] # LITTLE ENDIAN, short
A,B,C,D=struct.unpack('4c', data[36:40]) # BIG ENDIAN, char*4
SubChunk2Size=struct.unpack('<l', data[40:44])[0] # LITTLE ENDIAN, long
waveData=data[44:]
if verbose:
print("ChunkSize =%d\nSubchunk1Size =%d\nAudioFormat =%d\nNumChannels =%d\nSampleRate =%d\nByteRate =%d\nBlockAlign =%d\nBitsPerSample =%d\nA:%c, B:%c, C:%c, D:%c\nSubChunk2Size =%d" %
(ChunkSize ,
Subchunk1Size,
AudioFormat ,
NumChannels ,
SampleRate ,
ByteRate ,
BlockAlign ,
BitsPerSample ,
A, B, C, D ,
SubChunk2Size ))
# convert audio data to float based on bitdepth
if BitsPerSample==8:
if verbose:
print("Unpacking 8 bits on len(waveData)=%d" % len(waveData))
d=np.fromstring(waveData,np.uint8)
floatdata=d.astype(np.float64)/np.float(127)
elif BitsPerSample==16:
if verbose:
print("Unpacking 16 bits on len(waveData)=%d" % len(waveData))
d=np.zeros(SubChunk2Size/2, dtype=np.int16)
j=0
for k in range(0, SubChunk2Size, 2):
d[j]=struct.unpack('<h',waveData[k:k+2])[0]
j=j+1
floatdata=d.astype(np.float64)/np.float(32767)
elif BitsPerSample==24:
if verbose:
print("Unpacking 24 bits on len(waveData)=%d" % len(waveData))
d=np.zeros(SubChunk2Size/3, dtype=np.int32)
j=0
for k in range(0, SubChunk2Size, 3):
d[j]=struct.unpack('<l',struct.pack('c',waveData[k])+waveData[k:k+3])[0]
j=j+1
floatdata=d.astype(np.float64)/np.float(2147483647)
else: # anything else will be considered 32 bits
if verbose:
print("Unpacking 32 bits on len(waveData)=%d" % len(waveData))
d=np.fromstring(waveData,np.int32)
floatdata=d.astype(np.float64)/np.float(2147483647)
v=floatdata[0::NumChannels]
for i in range(1,NumChannels):
v=np.vstack((v,floatdata[i::NumChannels]))
#return (np.vstack((floatdata[0::2],floatdata[1::2])), SampleRate, NumChannels, BitsPerSample)
return (v, SampleRate, NumChannels, BitsPerSample)
def change_samplerate_interp(old_audio,old_rate,new_rate):
'''
Change sample rate to new sample rate by simple interpolation.
If old_rate > new_rate, there may be aliasing / data loss.
Input should be in column format, as the interpolation will be completed
on each channel this way.
Modified from:
https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate
'''
if old_rate != new_rate:
# duration of audio
duration = old_audio.shape[0] / old_rate
# length of old and new audio
time_old = np.linspace(0, duration, old_audio.shape[0])
time_new = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate))
# fit old_audio into new_audio length by interpolation
interpolator = interpolate.interp1d(time_old, old_audio.T)
new_audio = interpolator(time_new).T
return new_audio
else:
print('Conversion not needed, old and new rates match')
return old_audio # conversion not needed
def main(argv):
inputdirectory = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hd:o:",["inputdirectory=","outputfile="])
except getopt.GetoptError:
print('Error in usage, correct format:\n'+\
'morphagene_directory.py -d <inputdirectory> -o <outputfile>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('morphagene_directory.py -d <inputdirectory> -o <outputfile>')
sys.exit()
elif opt in ("-d", "--inputdirectory"):
inputdirectory = arg
elif opt in ("-o", "--outputfile"):
outputfile = arg
print('Input directory: %s'%inputdirectory)
print('Output Morphagene reel: %s'%outputfile)
###########################################################################
'''
Write single file, to Morphagene 32bit
WAV file at 48000hz sample rate.
'''
###########################################################################
morph_srate = 48000 # required samplerate for Morphagene
# find .wav-files in inputdirectory
wavfiles = []
entries = sorted(os.listdir(inputdirectory))
for entry in entries:
if entry.endswith('.wav'):
wavfiles.append("{0}/{1}".format(inputdirectory,entry))
first_num_channels = None
first_bits_per_sample = None
allwaves = None
markers = []
for wavfile in wavfiles:
# read pertinent info from audio file, exit if input wave file is broken
try:
(array,sample_rate,num_channels,bits_per_sample)=wav_file_read(wavfile, verbose=False)
if first_num_channels == None:
first_num_channels = num_channels
first_bits_per_sample = bits_per_sample
else:
if num_channels != first_num_channels or bits_per_sample != first_bits_per_sample:
print('All wave files must have the same number of channels and bitrate')
sys.exit()
except:
print('Input .wav file %s is poorly formatted, exiting'%wavfile)
sys.exit()
# check if input wav has a different rate than desired Morphagene rate,
# and correct by interpolation
if sample_rate != morph_srate:
print("Correcting input sample rate %iHz to Morphagene rate %iHz"%(sample_rate,morph_srate))
# perform interpolation on each channel, then transpose back
array = change_samplerate_interp(array.T,float(sample_rate),float(morph_srate)).T
if allwaves == None:
allwaves = np.array(array)
markers.append(len(array[0]))
else:
#for a in array:
allwaves = np.concatenate((allwaves,array), axis=1)
markers.append(len(array[0]) + markers[-1])
markers.pop() # remove last marker
# write wav file with additional cue markers from labels
float32_wav_file(outputfile,allwaves,morph_srate,markers=markers)
print('Saved Morphagene reel with %i splices: %s'%(len(markers)+1,outputfile))
if __name__ == "__main__":
main(sys.argv[1:])
@eznj
Copy link

eznj commented Feb 14, 2020

Good stuff! I feel like you should convert this from gist to repo elsewhere. I have some ideas to propose when I get some time later.

@knandersen
Copy link
Author

Great idea @nicksort, I was actually considering that too!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment