drscotthawley/inst_freq.py

## inst_freq.py
#! /usr/bin/env python3
# Test script for Phase 'Unrolling' / Instantaneous frequency
#
# See Jesse Engel's "rainbowgrams" script, https://gist.github.com/jesseengel/e223622e255bd5b8c9130407397a0494
#
# Modifications by Scott H. Hawley, @drscotthawley and Billy Mitchell
# These modified versions seem to be both more accurate (~2000x less reconstruction error)
# and faster (>20%)
#
# note, to really see/hear the difference, change dtypes to np.float16!

import librosa
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import argparse


def rewrap(phase_angle):  # opposite of unwrap; handy for testing but I don't actually like to use it
    return (phase_angle + np.pi) % (2 * np.pi) - np.pi


def phase2if(phase_angle, use_unwrap=False, use_where=True):
    # Calculate 'dphase' Instantaneous Frequency, normalized to [-1,1]
    if use_unwrap:  # Engel et al, GANSynth 2017 paper & code
        phase_angle = np.unwrap(phase_angle) # unwrap leads to CLOP

    dphase = phase_angle[:, 1:] - phase_angle[:, :-1]

    if not use_unwrap:
        # Hawley's method: don't unroll the phase, that leads to CLOP.  All we want is the difference
        if use_where:
            dphase = np.where(dphase > np.pi, dphase-2*np.pi, dphase)  # keep it bounded
            dphase = np.where(dphase < -np.pi, dphase+2*np.pi, dphase)  # keep it bounded
        else:
            dphase = rewrap(dphase)  # this works ok but loses a little bit of precision

    dphase = np.concatenate([phase_angle[:, 0:1], dphase], axis=1)
    return dphase/np.pi


def if2phase(dphase, use_cumsum=False):
    # undoes phase2if, above
    dphase = dphase*np.pi    # undo normalization by pi
    if use_cumsum:
        phase_angle = np.cumsum(dphase, axis=1)  # cumsum leads to CLOP
    else:
        # Integrate dphase forward but make it wrap at +/- pi as we go, to avoid CLOP
        phase_angle = dphase
        for i in range(1, dphase.shape[-1]):
            phase_angle[:,i] = (phase_angle[:, i-1] + dphase[:, i])
            phase_angle[:,i] = np.where(phase_angle[:,i] >= np.pi, phase_angle[:,i] - 2*np.pi, phase_angle[:,i])
            phase_angle[:,i] = np.where(phase_angle[:,i] < -np.pi, phase_angle[:,i] + 2*np.pi, phase_angle[:,i])
    return phase_angle


def audio_test(path=None, ax=1, peak=70.0, use_google=True, n_fft=2048,
    win_length=512, hop_length=256, debug=False):
    print("Loading from file",path)
    # find mag and phase, then phase unroll
    audio, sr = librosa.load(path)
    audio = audio.astype(np.float32)

    n = len(audio)
    audio_pad = librosa.util.fix_length(audio, n + n_fft // 2)
    #C = librosa.stft(audio_pad, n_fft=n_fft)
    C = librosa.stft(audio, n_fft=n_fft, win_length=win_length, hop_length=hop_length, center=True)
    mag = np.abs(C)
    phase_angle = np.angle(C)

    # convert to 'instantaneous frequency', which we call dphase
    dphase = phase2if(phase_angle, use_unwrap=use_google)

    # In case you want to convert magnitude to power in dB...
    #mag_db = (librosa.power_to_db(mag**2, amin=1e-13, top_db=peak, ref=np.max) / peak) + 1


    # ...in other code, here's where we'd do the autoencoder on mag and dphase


    # Undo what we did above:  reconstruct phase from dphase
    phase_angle_hat = if2phase(dphase, use_cumsum=use_google)

    phase_hat = np.exp(1j*phase_angle_hat)  # convert from just angle to complex number

    # combine new mag and phase to complex spectrogram C_hat, and generate output audio via ISTFT
    mag_hat = mag
    C_hat = mag_hat * phase_hat  # combine into new complex array
    audio_out = librosa.istft(C_hat, length=n, win_length=win_length, hop_length=hop_length, center=True)

    if debug:
        diff = audio - audio_out
        print("absmax audio diff = ",np.max(np.abs(diff)))
        plt.title("input audio - output audio")
        plt.plot(diff)
        plt.show()
        librosa.output.write_wav('test_out.wav',audio_out,sr)
    return audio_out


if __name__ == '__main__':
  parser= argparse.ArgumentParser(description='none', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('--path', help='location of file', default='./Leadfoot_ScottHawley_clip.wav')
  args=parser.parse_args()


  for use_google in [True, False]:  # Engel et al's version vs. my version
      audio_test(path=args.path, use_google=use_google, debug=True)
	#! /usr/bin/env python3
	# Test script for Phase 'Unrolling' / Instantaneous frequency
	#
	# See Jesse Engel's "rainbowgrams" script, https://gist.github.com/jesseengel/e223622e255bd5b8c9130407397a0494
	#
	# Modifications by Scott H. Hawley, @drscotthawley and Billy Mitchell
	# These modified versions seem to be both more accurate (~2000x less reconstruction error)
	# and faster (>20%)
	#
	# note, to really see/hear the difference, change dtypes to np.float16!

	import librosa
	import matplotlib
	import matplotlib.pyplot as plt
	import numpy as np
	import argparse


	def rewrap(phase_angle): # opposite of unwrap; handy for testing but I don't actually like to use it
	return (phase_angle + np.pi) % (2 * np.pi) - np.pi


	def phase2if(phase_angle, use_unwrap=False, use_where=True):
	# Calculate 'dphase' Instantaneous Frequency, normalized to [-1,1]
	if use_unwrap: # Engel et al, GANSynth 2017 paper & code
	phase_angle = np.unwrap(phase_angle) # unwrap leads to CLOP

	dphase = phase_angle[:, 1:] - phase_angle[:, :-1]

	if not use_unwrap:
	# Hawley's method: don't unroll the phase, that leads to CLOP. All we want is the difference
	if use_where:
	dphase = np.where(dphase > np.pi, dphase-2*np.pi, dphase) # keep it bounded
	dphase = np.where(dphase < -np.pi, dphase+2*np.pi, dphase) # keep it bounded
	else:
	dphase = rewrap(dphase) # this works ok but loses a little bit of precision

	dphase = np.concatenate([phase_angle[:, 0:1], dphase], axis=1)
	return dphase/np.pi


	def if2phase(dphase, use_cumsum=False):
	# undoes phase2if, above
	dphase = dphase*np.pi # undo normalization by pi
	if use_cumsum:
	phase_angle = np.cumsum(dphase, axis=1) # cumsum leads to CLOP
	else:
	# Integrate dphase forward but make it wrap at +/- pi as we go, to avoid CLOP
	phase_angle = dphase
	for i in range(1, dphase.shape[-1]):
	phase_angle[:,i] = (phase_angle[:, i-1] + dphase[:, i])
	phase_angle[:,i] = np.where(phase_angle[:,i] >= np.pi, phase_angle[:,i] - 2*np.pi, phase_angle[:,i])
	phase_angle[:,i] = np.where(phase_angle[:,i] < -np.pi, phase_angle[:,i] + 2*np.pi, phase_angle[:,i])
	return phase_angle


	def audio_test(path=None, ax=1, peak=70.0, use_google=True, n_fft=2048,
	win_length=512, hop_length=256, debug=False):
	print("Loading from file",path)
	# find mag and phase, then phase unroll
	audio, sr = librosa.load(path)
	audio = audio.astype(np.float32)

	n = len(audio)
	audio_pad = librosa.util.fix_length(audio, n + n_fft // 2)
	#C = librosa.stft(audio_pad, n_fft=n_fft)
	C = librosa.stft(audio, n_fft=n_fft, win_length=win_length, hop_length=hop_length, center=True)
	mag = np.abs(C)
	phase_angle = np.angle(C)

	# convert to 'instantaneous frequency', which we call dphase
	dphase = phase2if(phase_angle, use_unwrap=use_google)

	# In case you want to convert magnitude to power in dB...
	#mag_db = (librosa.power_to_db(mag**2, amin=1e-13, top_db=peak, ref=np.max) / peak) + 1


	# ...in other code, here's where we'd do the autoencoder on mag and dphase


	# Undo what we did above: reconstruct phase from dphase
	phase_angle_hat = if2phase(dphase, use_cumsum=use_google)

	phase_hat = np.exp(1j*phase_angle_hat) # convert from just angle to complex number

	# combine new mag and phase to complex spectrogram C_hat, and generate output audio via ISTFT
	mag_hat = mag
	C_hat = mag_hat * phase_hat # combine into new complex array
	audio_out = librosa.istft(C_hat, length=n, win_length=win_length, hop_length=hop_length, center=True)

	if debug:
	diff = audio - audio_out
	print("absmax audio diff = ",np.max(np.abs(diff)))
	plt.title("input audio - output audio")
	plt.plot(diff)
	plt.show()
	librosa.output.write_wav('test_out.wav',audio_out,sr)
	return audio_out


	if __name__ == '__main__':
	parser= argparse.ArgumentParser(description='none', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('--path', help='location of file', default='./Leadfoot_ScottHawley_clip.wav')
	args=parser.parse_args()


	for use_google in [True, False]: # Engel et al's version vs. my version
	audio_test(path=args.path, use_google=use_google, debug=True)