Skip to content

Instantly share code, notes, and snippets.

@drscotthawley
Last active November 4, 2019 18:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drscotthawley/3c812fde932c649cd3cd41ea8a29803c to your computer and use it in GitHub Desktop.
Save drscotthawley/3c812fde932c649cd3cd41ea8a29803c to your computer and use it in GitHub Desktop.
Alternative method for calculating "instantaneous frequency" for use with spectrograms.
#! /usr/bin/env python3
# Test script for Phase 'Unrolling' / Instantaneous frequency
#
# See Jesse Engel's "rainbowgrams" script, https://gist.github.com/jesseengel/e223622e255bd5b8c9130407397a0494
#
# Modifications by Scott H. Hawley, @drscotthawley and Billy Mitchell
# These modified versions seem to be both more accurate (~2000x less reconstruction error)
# and faster (>20%)
#
# note, to really see/hear the difference, change dtypes to np.float16!
import librosa
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import argparse
def rewrap(phase_angle): # opposite of unwrap; handy for testing but I don't actually like to use it
return (phase_angle + np.pi) % (2 * np.pi) - np.pi
def phase2if(phase_angle, use_unwrap=False, use_where=True):
# Calculate 'dphase' Instantaneous Frequency, normalized to [-1,1]
if use_unwrap: # Engel et al, GANSynth 2017 paper & code
phase_angle = np.unwrap(phase_angle) # unwrap leads to CLOP
dphase = phase_angle[:, 1:] - phase_angle[:, :-1]
if not use_unwrap:
# Hawley's method: don't unroll the phase, that leads to CLOP. All we want is the difference
if use_where:
dphase = np.where(dphase > np.pi, dphase-2*np.pi, dphase) # keep it bounded
dphase = np.where(dphase < -np.pi, dphase+2*np.pi, dphase) # keep it bounded
else:
dphase = rewrap(dphase) # this works ok but loses a little bit of precision
dphase = np.concatenate([phase_angle[:, 0:1], dphase], axis=1)
return dphase/np.pi
def if2phase(dphase, use_cumsum=False):
# undoes phase2if, above
dphase = dphase*np.pi # undo normalization by pi
if use_cumsum:
phase_angle = np.cumsum(dphase, axis=1) # cumsum leads to CLOP
else:
# Integrate dphase forward but make it wrap at +/- pi as we go, to avoid CLOP
phase_angle = dphase
for i in range(1, dphase.shape[-1]):
phase_angle[:,i] = (phase_angle[:, i-1] + dphase[:, i])
phase_angle[:,i] = np.where(phase_angle[:,i] >= np.pi, phase_angle[:,i] - 2*np.pi, phase_angle[:,i])
phase_angle[:,i] = np.where(phase_angle[:,i] < -np.pi, phase_angle[:,i] + 2*np.pi, phase_angle[:,i])
return phase_angle
def audio_test(path=None, ax=1, peak=70.0, use_google=True, n_fft=2048,
win_length=512, hop_length=256, debug=False):
print("Loading from file",path)
# find mag and phase, then phase unroll
audio, sr = librosa.load(path)
audio = audio.astype(np.float32)
n = len(audio)
audio_pad = librosa.util.fix_length(audio, n + n_fft // 2)
#C = librosa.stft(audio_pad, n_fft=n_fft)
C = librosa.stft(audio, n_fft=n_fft, win_length=win_length, hop_length=hop_length, center=True)
mag = np.abs(C)
phase_angle = np.angle(C)
# convert to 'instantaneous frequency', which we call dphase
dphase = phase2if(phase_angle, use_unwrap=use_google)
# In case you want to convert magnitude to power in dB...
#mag_db = (librosa.power_to_db(mag**2, amin=1e-13, top_db=peak, ref=np.max) / peak) + 1
# ...in other code, here's where we'd do the autoencoder on mag and dphase
# Undo what we did above: reconstruct phase from dphase
phase_angle_hat = if2phase(dphase, use_cumsum=use_google)
phase_hat = np.exp(1j*phase_angle_hat) # convert from just angle to complex number
# combine new mag and phase to complex spectrogram C_hat, and generate output audio via ISTFT
mag_hat = mag
C_hat = mag_hat * phase_hat # combine into new complex array
audio_out = librosa.istft(C_hat, length=n, win_length=win_length, hop_length=hop_length, center=True)
if debug:
diff = audio - audio_out
print("absmax audio diff = ",np.max(np.abs(diff)))
plt.title("input audio - output audio")
plt.plot(diff)
plt.show()
librosa.output.write_wav('test_out.wav',audio_out,sr)
return audio_out
if __name__ == '__main__':
parser= argparse.ArgumentParser(description='none', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--path', help='location of file', default='./Leadfoot_ScottHawley_clip.wav')
args=parser.parse_args()
for use_google in [True, False]: # Engel et al's version vs. my version
audio_test(path=args.path, use_google=use_google, debug=True)
@drscotthawley
Copy link
Author

Just noting here for future reference:
"One-step robust deep learning phase unwrapping " by Wang et al, April 2019:
https://www.osapublishing.org/DirectPDFAccess/2C694D16-BD6C-C4A2-226D6623E18ACAF2_412322/oe-27-10-15100.pdf?da=1&id=412322&seq=0&mobile=no

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment