ferrihydrite/morphagene_audacity_old.py

## morphagene_audacity_old.py
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Used to convert Audacity labels in .txt form on  .WAV files into
    single 32-bit float .WAV with CUE markers within the file, directly
    compatible with the Make Noise Morphagene.
Does not require input file to be 48000Hz, only that the Audacity label matches
    the .WAV file that generated it, and that the input .WAV is stereo.

See the Morphagene manual for naming conventions of output files:
    http://www.makenoisemusic.com/content/manuals/morphagene-manual.pdf
"""

# see http://stackoverflow.com/questions/15576798/create-32bit-float-wav-file-in-python
# see... http://blog.theroyweb.com/extracting-wav-file-header-information-using-a-python-script
# marker code from Joseph Basquin [https://gist.github.com/josephernest/3f22c5ed5dabf1815f16efa8fa53d476]
import struct, os
import numpy as np
from scipy import interpolate

def float32_wav_file(file_name, sample_array, sample_rate,
                     markers=None, verbose=False):
    (M,N)=sample_array.shape
    #print "len sample_array=(%d,%d)" % (M,N)
    byte_count = M * N * 4 # (len(sample_array)) * 4  # 32-bit floats
    wav_file = ""
    # write the header
    wav_file += struct.pack('<ccccIccccccccIHHIIHH',
        'R', 'I', 'F', 'F',
        byte_count + 0x2c - 8,  # header size
        'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
        0x10,  # size of 'fmt ' header
        3,  # format 3 = floating-point PCM
        M,  # channels
        sample_rate,  # samples / second
        sample_rate * 4,  # bytes / second
        4,  # block alignment
        32)  # bits / sample
    wav_file += struct.pack('<ccccI',
        'd', 'a', 't', 'a', byte_count)
    if verbose:
        print "packing..."
    for j in range(0,N):
        for k in range(0,M):
            wav_file += struct.pack("<f", sample_array[k,j])
    if verbose:
        print "saving audio..."
    fid=open(file_name,'wb')
    for value in wav_file:
        fid.write(value)
    if markers:    # != None and != []
        if verbose:
            print "saving cue markers..."
        if isinstance(markers[0], dict):       # then we have [{'position': 100, 'label': 'marker1'}, ...]
            labels = [m['label'] for m in markers]
            markers = [m['position'] for m in markers]
        else:
            labels = ['' for m in markers]
        fid.write(b'cue ')
        size = 4 + len(markers) * 24
        fid.write(struct.pack('<ii', size, len(markers)))
        for i, c in enumerate(markers):
            s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c)           # 1635017060 is struct.unpack('<i',b'data')
            fid.write(s)
        lbls = ''
        for i, lbl in enumerate(labels):
            lbls += b'labl'
            label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00')
            size = len(lbl) + 1 + 4          # because \x00
            lbls += struct.pack('<ii', size, i + 1)
            lbls += label
        fid.write(b'LIST')
        size = len(lbls) + 4
        fid.write(struct.pack('<i', size))
        fid.write(b'adtl')                                                      # https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list
        fid.write(lbls)
    fid.close()
    return wav_file

def wav_file_read(filename,verbose=False):
    # read file and close
    fi=open(filename,'rb')
    data=fi.read()
    fi.close()
    # take raw data and read subsections for important format data
    A,B,C,D=struct.unpack('4c', data[0:4])      # 'RIFF'
    ChunkSize=struct.unpack('<l', data[4:8])[0]   #4+(8+SubChunk1Size)+8+SubChunk2Size)
    A,B,C,D=struct.unpack('4c', data[8:12])     # 'WAVE'
    A,B,C,D=struct.unpack('4c', data[12:16])    # 'fmt '
    Subchunk1Size=struct.unpack('<l', data[16:20])[0] # LITTLE ENDIAN, long, 16
    AudioFormat=struct.unpack('<h', data[20:22])[0] # LITTLE ENDIAN, short, 1
    NumChannels=struct.unpack('<h', data[22:24])[0] # LITTLE ENDIAN, short, Mono = 1, Stereo = 2
    SampleRate =struct.unpack('<l', data[24:28])[0] # LITTLE ENDIAN, long,  sample rate in samples per second
    ByteRate=struct.unpack('<l', data[28:32])[0] # self.SampleRate * self.NumChannels * self.BitsPerSample/8)) # (ByteRate) LITTLE ENDIAN, long
    BlockAlign=struct.unpack('<h', data[32:34])[0] # self.NumChannels * self.BitsPerSample/8))  # (BlockAlign) LITTLE ENDIAN, short
    BitsPerSample=struct.unpack('<h', data[34:36])[0] # LITTLE ENDIAN, short
    A,B,C,D=struct.unpack('4c', data[36:40])    # BIG ENDIAN, char*4
    SubChunk2Size=struct.unpack('<l', data[40:44])[0] # LITTLE ENDIAN, long
    waveData=data[44:]
    (M,N)=(len(waveData),len(waveData[0]))
    if verbose:
        print("ChunkSize     =%d\nSubchunk1Size =%d\nAudioFormat   =%d\nNumChannels   =%d\nSampleRate    =%d\nByteRate      =%d\nBlockAlign    =%d\nBitsPerSample =%d\nA:%c,  B:%c,  C:%c,  D:%c\nSubChunk2Size =%d" %
            (ChunkSize     ,
            Subchunk1Size,
            AudioFormat   ,
            NumChannels   ,
            SampleRate    ,
            ByteRate      ,
            BlockAlign    ,
            BitsPerSample ,
            A,  B,  C,  D ,
            SubChunk2Size ))
    # convert audio data to float based on bitdepth
    if BitsPerSample==8:
        if verbose:
            print "Unpacking 8 bits on len(waveData)=%d" % len(waveData)
        d=np.fromstring(waveData,np.uint8)
        floatdata=d.astype(np.float64)/np.float(127)
    elif BitsPerSample==16:
        if verbose:
            print "Unpacking 16 bits on len(waveData)=%d" % len(waveData)
        d=np.zeros(SubChunk2Size/2, dtype=np.int16)
        j=0
        for k in range(0, SubChunk2Size, 2):
            d[j]=struct.unpack('<h',waveData[k:k+2])[0]
            j=j+1
        floatdata=d.astype(np.float64)/np.float(32767)
    elif BitsPerSample==24:
        if verbose:
            print "Unpacking 24 bits on len(waveData)=%d" % len(waveData)
        d=np.zeros(SubChunk2Size/3,  dtype=np.int32)
        j=0
        for k in range(0, SubChunk2Size, 3):
            d[j]=struct.unpack('<l',struct.pack('c',waveData[k])+waveData[k:k+3])[0]
            j=j+1
        floatdata=d.astype(np.float64)/np.float(2147483647)
    else: # anything else will be considered 32 bits
        if verbose:
            print "Unpacking 32 bits on len(waveData)=%d" % len(waveData)
        d=np.fromstring(waveData,np.int32)
        floatdata=d.astype(np.float64)/np.float(2147483647)
    v=floatdata[0::NumChannels]
    for i in range(1,NumChannels):
        v=np.vstack((v,floatdata[i::NumChannels]))
    #return (np.vstack((floatdata[0::2],floatdata[1::2])), SampleRate, NumChannels, BitsPerSample)
    return (v, SampleRate, NumChannels, BitsPerSample)

def load_audacity_labels(label_file):
    '''
    Load Audacity labels, ignoring the additional frequency range info lines,
        if labels were exported from a spectrogram.
    '''
    fi = open(label_file, 'r')
    labs = [line.strip().split()[0] for line in fi if not line.startswith('\\')]
    fi.close()
    return np.array(labs).astype('float')

def change_samplerate_interp(old_audio,old_rate,new_rate):
    '''
    Change sample rate to new sample rate by simple interpolation.
    If old_rate > new_rate, there may be aliasing / data loss.
    Input should be in column format, as the interpolation will be completed
        on each channel this way.
    Modified from:
    https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate
    '''
    if old_rate != new_rate:
        # duration of audio
        duration = old_audio.shape[0] / old_rate

        # length of old and new audio
        time_old  = np.linspace(0, duration, old_audio.shape[0])
        time_new  = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate))

        # fit old_audio into new_audio length by interpolation
        interpolator = interpolate.interp1d(time_old, old_audio.T)
        new_audio = interpolator(time_new).T
        return new_audio
    else:
        print 'Conversion not needed, old and new rates match'
        return old_audio # conversion not needed

if __name__ == "__main__":
    morph_srate = 48000 # required samplerate for Morphagene
    write_label_file = True
    ###########################################################################
    '''
    Write single file, edited in Audacity with labels, to Morphagene 32bit
        WAV file
    '''
    ###########################################################################
    label_file = 'boring.txt'
    audio_file = 'boring.wav'

    # read labels from stereo Audacity label file, ignore text, and use one channel
    audac_labs = load_audacity_labels(label_file)

    # read pertinent info from audio file
    (array,sample_rate,num_channels,bits_per_sample)=wav_file_read(audio_file)

    # check if input wav has a different rate than desired Morphagene rate,
    #   and correct by interpolation
    if sample_rate != morph_srate:
        # perform interpolation on each channel, then transpose back
        array = change_samplerate_interp(array.T,float(sample_rate),float(morph_srate)).T
        # convert labels in seconds to labels in frames, adjusting for change
        #   in rate
        sc = float(morph_srate) / float(sample_rate)
        frame_labs = (audac_labs * sample_rate * sc).astype(np.int)
    else:
        frame_labs = (audac_labs * sample_rate).astype(np.int)
    frame_dict = [{'position': l, 'label': 'marker%i'%(i+1)} for i,l in enumerate(frame_labs)]

    # write wav file with additional cue markers from labels
    wavefile=float32_wav_file("mg_boring.wav",array,morph_srate,markers=frame_dict)
    if write_label_file:
        labs = np.tile(frame_labs,(2,1)).T/float(morph_srate)
        np.savetxt('boring_lab.txt',labs,fmt='%03.6f',delimiter='\t')
	#!/usr/bin/env python2
	# -- coding: utf-8 --
	"""
	Used to convert Audacity labels in .txt form on .WAV files into
	single 32-bit float .WAV with CUE markers within the file, directly
	compatible with the Make Noise Morphagene.
	Does not require input file to be 48000Hz, only that the Audacity label matches
	the .WAV file that generated it, and that the input .WAV is stereo.

	See the Morphagene manual for naming conventions of output files:
	http://www.makenoisemusic.com/content/manuals/morphagene-manual.pdf
	"""

	# see http://stackoverflow.com/questions/15576798/create-32bit-float-wav-file-in-python
	# see... http://blog.theroyweb.com/extracting-wav-file-header-information-using-a-python-script
	# marker code from Joseph Basquin [https://gist.github.com/josephernest/3f22c5ed5dabf1815f16efa8fa53d476]
	import struct, os
	import numpy as np
	from scipy import interpolate

	def float32_wav_file(file_name, sample_array, sample_rate,
	markers=None, verbose=False):
	(M,N)=sample_array.shape
	#print "len sample_array=(%d,%d)" % (M,N)
	byte_count = M * N * 4 # (len(sample_array)) * 4 # 32-bit floats
	wav_file = ""
	# write the header
	wav_file += struct.pack('<ccccIccccccccIHHIIHH',
	'R', 'I', 'F', 'F',
	byte_count + 0x2c - 8, # header size
	'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
	0x10, # size of 'fmt ' header
	3, # format 3 = floating-point PCM
	M, # channels
	sample_rate, # samples / second
	sample_rate * 4, # bytes / second
	4, # block alignment
	32) # bits / sample
	wav_file += struct.pack('<ccccI',
	'd', 'a', 't', 'a', byte_count)
	if verbose:
	print "packing..."
	for j in range(0,N):
	for k in range(0,M):
	wav_file += struct.pack("<f", sample_array[k,j])
	if verbose:
	print "saving audio..."
	fid=open(file_name,'wb')
	for value in wav_file:
	fid.write(value)
	if markers: # != None and != []
	if verbose:
	print "saving cue markers..."
	if isinstance(markers[0], dict): # then we have [{'position': 100, 'label': 'marker1'}, ...]
	labels = [m['label'] for m in markers]
	markers = [m['position'] for m in markers]
	else:
	labels = ['' for m in markers]
	fid.write(b'cue ')
	size = 4 + len(markers) * 24
	fid.write(struct.pack('<ii', size, len(markers)))
	for i, c in enumerate(markers):
	s = struct.pack('<iiiiii', i + 1, c, 1635017060, 0, 0, c) # 1635017060 is struct.unpack('<i',b'data')
	fid.write(s)
	lbls = ''
	for i, lbl in enumerate(labels):
	lbls += b'labl'
	label = lbl + ('\x00' if len(lbl) % 2 == 1 else '\x00\x00')
	size = len(lbl) + 1 + 4 # because \x00
	lbls += struct.pack('<ii', size, i + 1)
	lbls += label
	fid.write(b'LIST')
	size = len(lbls) + 4
	fid.write(struct.pack('<i', size))
	fid.write(b'adtl') # https://web.archive.org/web/20141226210234/http://www.sonicspot.com/guide/wavefiles.html#list
	fid.write(lbls)
	fid.close()
	return wav_file

	def wav_file_read(filename,verbose=False):
	# read file and close
	fi=open(filename,'rb')
	data=fi.read()
	fi.close()
	# take raw data and read subsections for important format data
	A,B,C,D=struct.unpack('4c', data[0:4]) # 'RIFF'
	ChunkSize=struct.unpack('<l', data[4:8])[0] #4+(8+SubChunk1Size)+8+SubChunk2Size)
	A,B,C,D=struct.unpack('4c', data[8:12]) # 'WAVE'
	A,B,C,D=struct.unpack('4c', data[12:16]) # 'fmt '
	Subchunk1Size=struct.unpack('<l', data[16:20])[0] # LITTLE ENDIAN, long, 16
	AudioFormat=struct.unpack('<h', data[20:22])[0] # LITTLE ENDIAN, short, 1
	NumChannels=struct.unpack('<h', data[22:24])[0] # LITTLE ENDIAN, short, Mono = 1, Stereo = 2
	SampleRate =struct.unpack('<l', data[24:28])[0] # LITTLE ENDIAN, long, sample rate in samples per second
	ByteRate=struct.unpack('<l', data[28:32])[0] # self.SampleRate * self.NumChannels * self.BitsPerSample/8)) # (ByteRate) LITTLE ENDIAN, long
	BlockAlign=struct.unpack('<h', data[32:34])[0] # self.NumChannels * self.BitsPerSample/8)) # (BlockAlign) LITTLE ENDIAN, short
	BitsPerSample=struct.unpack('<h', data[34:36])[0] # LITTLE ENDIAN, short
	A,B,C,D=struct.unpack('4c', data[36:40]) # BIG ENDIAN, char*4
	SubChunk2Size=struct.unpack('<l', data[40:44])[0] # LITTLE ENDIAN, long
	waveData=data[44:]
	(M,N)=(len(waveData),len(waveData[0]))
	if verbose:
	print("ChunkSize =%d\nSubchunk1Size =%d\nAudioFormat =%d\nNumChannels =%d\nSampleRate =%d\nByteRate =%d\nBlockAlign =%d\nBitsPerSample =%d\nA:%c, B:%c, C:%c, D:%c\nSubChunk2Size =%d" %
	(ChunkSize ,
	Subchunk1Size,
	AudioFormat ,
	NumChannels ,
	SampleRate ,
	ByteRate ,
	BlockAlign ,
	BitsPerSample ,
	A, B, C, D ,
	SubChunk2Size ))
	# convert audio data to float based on bitdepth
	if BitsPerSample==8:
	if verbose:
	print "Unpacking 8 bits on len(waveData)=%d" % len(waveData)
	d=np.fromstring(waveData,np.uint8)
	floatdata=d.astype(np.float64)/np.float(127)
	elif BitsPerSample==16:
	if verbose:
	print "Unpacking 16 bits on len(waveData)=%d" % len(waveData)
	d=np.zeros(SubChunk2Size/2, dtype=np.int16)
	j=0
	for k in range(0, SubChunk2Size, 2):
	d[j]=struct.unpack('<h',waveData[k:k+2])[0]
	j=j+1
	floatdata=d.astype(np.float64)/np.float(32767)
	elif BitsPerSample==24:
	if verbose:
	print "Unpacking 24 bits on len(waveData)=%d" % len(waveData)
	d=np.zeros(SubChunk2Size/3, dtype=np.int32)
	j=0
	for k in range(0, SubChunk2Size, 3):
	d[j]=struct.unpack('<l',struct.pack('c',waveData[k])+waveData[k:k+3])[0]
	j=j+1
	floatdata=d.astype(np.float64)/np.float(2147483647)
	else: # anything else will be considered 32 bits
	if verbose:
	print "Unpacking 32 bits on len(waveData)=%d" % len(waveData)
	d=np.fromstring(waveData,np.int32)
	floatdata=d.astype(np.float64)/np.float(2147483647)
	v=floatdata[0::NumChannels]
	for i in range(1,NumChannels):
	v=np.vstack((v,floatdata[i::NumChannels]))
	#return (np.vstack((floatdata[0::2],floatdata[1::2])), SampleRate, NumChannels, BitsPerSample)
	return (v, SampleRate, NumChannels, BitsPerSample)

	def load_audacity_labels(label_file):
	'''
	Load Audacity labels, ignoring the additional frequency range info lines,
	if labels were exported from a spectrogram.
	'''
	fi = open(label_file, 'r')
	labs = [line.strip().split()[0] for line in fi if not line.startswith('\\')]
	fi.close()
	return np.array(labs).astype('float')

	def change_samplerate_interp(old_audio,old_rate,new_rate):
	'''
	Change sample rate to new sample rate by simple interpolation.
	If old_rate > new_rate, there may be aliasing / data loss.
	Input should be in column format, as the interpolation will be completed
	on each channel this way.
	Modified from:
	https://stackoverflow.com/questions/33682490/how-to-read-a-wav-file-using-scipy-at-a-different-sampling-rate
	'''
	if old_rate != new_rate:
	# duration of audio
	duration = old_audio.shape[0] / old_rate

	# length of old and new audio
	time_old = np.linspace(0, duration, old_audio.shape[0])
	time_new = np.linspace(0, duration, int(old_audio.shape[0] * new_rate / old_rate))

	# fit old_audio into new_audio length by interpolation
	interpolator = interpolate.interp1d(time_old, old_audio.T)
	new_audio = interpolator(time_new).T
	return new_audio
	else:
	print 'Conversion not needed, old and new rates match'
	return old_audio # conversion not needed

	if __name__ == "__main__":
	morph_srate = 48000 # required samplerate for Morphagene
	write_label_file = True
	###########################################################################
	'''
	Write single file, edited in Audacity with labels, to Morphagene 32bit
	WAV file
	'''
	###########################################################################
	label_file = 'boring.txt'
	audio_file = 'boring.wav'

	# read labels from stereo Audacity label file, ignore text, and use one channel
	audac_labs = load_audacity_labels(label_file)

	# read pertinent info from audio file
	(array,sample_rate,num_channels,bits_per_sample)=wav_file_read(audio_file)

	# check if input wav has a different rate than desired Morphagene rate,
	# and correct by interpolation
	if sample_rate != morph_srate:
	# perform interpolation on each channel, then transpose back
	array = change_samplerate_interp(array.T,float(sample_rate),float(morph_srate)).T
	# convert labels in seconds to labels in frames, adjusting for change
	# in rate
	sc = float(morph_srate) / float(sample_rate)
	frame_labs = (audac_labs * sample_rate * sc).astype(np.int)
	else:
	frame_labs = (audac_labs * sample_rate).astype(np.int)
	frame_dict = [{'position': l, 'label': 'marker%i'%(i+1)} for i,l in enumerate(frame_labs)]

	# write wav file with additional cue markers from labels
	wavefile=float32_wav_file("mg_boring.wav",array,morph_srate,markers=frame_dict)
	if write_label_file:
	labs = np.tile(frame_labs,(2,1)).T/float(morph_srate)
	np.savetxt('boring_lab.txt',labs,fmt='%03.6f',delimiter='\t')