{{ message }}

Instantly share code, notes, and snippets.

# drscotthawley/inst_freq.py

Last active Nov 4, 2019
Alternative method for calculating "instantaneous frequency" for use with spectrograms.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 #! /usr/bin/env python3 # Test script for Phase 'Unrolling' / Instantaneous frequency # # See Jesse Engel's "rainbowgrams" script, https://gist.github.com/jesseengel/e223622e255bd5b8c9130407397a0494 # # Modifications by Scott H. Hawley, @drscotthawley and Billy Mitchell # These modified versions seem to be both more accurate (~2000x less reconstruction error) # and faster (>20%) # # note, to really see/hear the difference, change dtypes to np.float16! import librosa import matplotlib import matplotlib.pyplot as plt import numpy as np import argparse def rewrap(phase_angle): # opposite of unwrap; handy for testing but I don't actually like to use it return (phase_angle + np.pi) % (2 * np.pi) - np.pi def phase2if(phase_angle, use_unwrap=False, use_where=True): # Calculate 'dphase' Instantaneous Frequency, normalized to [-1,1] if use_unwrap: # Engel et al, GANSynth 2017 paper & code phase_angle = np.unwrap(phase_angle) # unwrap leads to CLOP dphase = phase_angle[:, 1:] - phase_angle[:, :-1] if not use_unwrap: # Hawley's method: don't unroll the phase, that leads to CLOP. All we want is the difference if use_where: dphase = np.where(dphase > np.pi, dphase-2*np.pi, dphase) # keep it bounded dphase = np.where(dphase < -np.pi, dphase+2*np.pi, dphase) # keep it bounded else: dphase = rewrap(dphase) # this works ok but loses a little bit of precision dphase = np.concatenate([phase_angle[:, 0:1], dphase], axis=1) return dphase/np.pi def if2phase(dphase, use_cumsum=False): # undoes phase2if, above dphase = dphase*np.pi # undo normalization by pi if use_cumsum: phase_angle = np.cumsum(dphase, axis=1) # cumsum leads to CLOP else: # Integrate dphase forward but make it wrap at +/- pi as we go, to avoid CLOP phase_angle = dphase for i in range(1, dphase.shape[-1]): phase_angle[:,i] = (phase_angle[:, i-1] + dphase[:, i]) phase_angle[:,i] = np.where(phase_angle[:,i] >= np.pi, phase_angle[:,i] - 2*np.pi, phase_angle[:,i]) phase_angle[:,i] = np.where(phase_angle[:,i] < -np.pi, phase_angle[:,i] + 2*np.pi, phase_angle[:,i]) return phase_angle def audio_test(path=None, ax=1, peak=70.0, use_google=True, n_fft=2048, win_length=512, hop_length=256, debug=False): print("Loading from file",path) # find mag and phase, then phase unroll audio, sr = librosa.load(path) audio = audio.astype(np.float32) n = len(audio) audio_pad = librosa.util.fix_length(audio, n + n_fft // 2) #C = librosa.stft(audio_pad, n_fft=n_fft) C = librosa.stft(audio, n_fft=n_fft, win_length=win_length, hop_length=hop_length, center=True) mag = np.abs(C) phase_angle = np.angle(C) # convert to 'instantaneous frequency', which we call dphase dphase = phase2if(phase_angle, use_unwrap=use_google) # In case you want to convert magnitude to power in dB... #mag_db = (librosa.power_to_db(mag**2, amin=1e-13, top_db=peak, ref=np.max) / peak) + 1 # ...in other code, here's where we'd do the autoencoder on mag and dphase # Undo what we did above: reconstruct phase from dphase phase_angle_hat = if2phase(dphase, use_cumsum=use_google) phase_hat = np.exp(1j*phase_angle_hat) # convert from just angle to complex number # combine new mag and phase to complex spectrogram C_hat, and generate output audio via ISTFT mag_hat = mag C_hat = mag_hat * phase_hat # combine into new complex array audio_out = librosa.istft(C_hat, length=n, win_length=win_length, hop_length=hop_length, center=True) if debug: diff = audio - audio_out print("absmax audio diff = ",np.max(np.abs(diff))) plt.title("input audio - output audio") plt.plot(diff) plt.show() librosa.output.write_wav('test_out.wav',audio_out,sr) return audio_out if __name__ == '__main__': parser= argparse.ArgumentParser(description='none', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--path', help='location of file', default='./Leadfoot_ScottHawley_clip.wav') args=parser.parse_args() for use_google in [True, False]: # Engel et al's version vs. my version audio_test(path=args.path, use_google=use_google, debug=True)

### drscotthawley commented Nov 4, 2019

Just noting here for future reference:
"One-step robust deep learning phase unwrapping " by Wang et al, April 2019:
https://www.osapublishing.org/DirectPDFAccess/2C694D16-BD6C-C4A2-226D6623E18ACAF2_412322/oe-27-10-15100.pdf?da=1&id=412322&seq=0&mobile=no

to join this conversation on GitHub. Already have an account? Sign in to comment