Skip to content

Instantly share code, notes, and snippets.

@voodoohop
Forked from keunwoochoi/pseudo_cqt_pytorch.py
Last active July 17, 2021 20:26
Show Gist options
  • Save voodoohop/2089c61218605f758289cada102c1b9e to your computer and use it in GitHub Desktop.
Save voodoohop/2089c61218605f758289cada102c1b9e to your computer and use it in GitHub Desktop.
To compute pseudo CQT (Constant-Q-transform using STFT) on Tensorflow.
import librosa
import tensorflow as tf
import numpy as np
cqt_filter_fft = librosa.constantq.__cqt_filter_fft
EPS = 0.0001
class PseudoCqt():
"""A class to compute pseudo-CQT with Tensorflow.
Written by Keunwoo Choi and adapted to tensorflow by Thomas Haferlach
API (+implementations) follows librosa (https://librosa.github.io/librosa/generated/librosa.core.pseudo_cqt.html)
Usage:
src, _ = librosa.load(filename)
cqt_calculator = PseudoCqt()
cqt_calculator(src)
"""
def __init__(self, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave=12, filter_scale=1,
norm=1, sparsity=0.01, window='hann', scale=True,
pad_mode='reflect'):
assert scale
assert window == "hann"
if fmin is None:
fmin = 2 * 32.703195 # note_to_hz('C2') because C1 is too low
fft_basis, n_fft, _ = cqt_filter_fft(sr, fmin, n_bins, bins_per_octave,
filter_scale, norm, sparsity,
hop_length=hop_length, window=window)
fft_basis = np.abs(fft_basis.astype(dtype=np.float32)).todense() # because it was sparse. (n_bins, n_fft)
self.fft_basis = tf.expand_dims(tf.convert_to_tensor(fft_basis),0) # (n_freq, n_bins)
self.n_fft = n_fft
self.hop_length = hop_length
self.pad_mode = pad_mode
self.scale = scale
self.window = tf.signal.hann_window
self.npdtype = np.float32
def __call__(self, y):
return self.forward(y)
def forward(self, y):
stft_magnitudes = tf.transpose(tf.math.real(tf.signal.stft(y, fft_length=self.n_fft,
frame_length=self.hop_length*4,
frame_step=self.hop_length,
window_fn=self.window,
pad_end=True)),perm=[0,2,1])
D = tf.math.pow(stft_magnitudes, 2) # n_freq, time
D = tf.math.sqrt(D + EPS) # without EPS, backpropagating through CQT can yield NaN.
# Project onto the pseudo-cqt basis
C = tf.matmul(self.fft_basis, D) # n_bins, time
C /= tf.math.sqrt(float(self.n_fft)) # because `scale` is always True
return C
@voodoohop
Copy link
Author

voodoohop commented May 5, 2020

Usage:

audio needs to be in batches

bins_per_octave=12
pseduocqt = PseudoCqt(sr=sample_rate, hop_length=128, n_bins=bins_per_octave*6,bins_per_octave=bins_per_octave)
x = pseduocqt(audio)
librosa.display.specshow(librosa.amplitude_to_db(x[0].numpy()),sr=sample_rate, cmap='gray_r')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment