palonso/nsgcq_encoded.py

## nsgcq_encoded.py
import numpy as np
import matplotlib.pyplot as plt

import essentia.pytools.spectral as sp
from essentia.standard import MonoLoader, MonoWriter
from essentia import lin2db

PARAMS = {
    'frameSize': 2 ** 14,
    'sampleRate': 44100,
    'minFrequency': 40,
    'maxFrequency': 15000,
    'binsPerOctave': 24,
    'normalize': 'sine',
    'transitionSize': 256
    }


def SNR(r, t, skip=8192):
    """
    r    : reference
    t    : test
    skip : number of samples to skip from the SNR computation
    """
    difference = ((r[skip: -skip] - t[skip: -skip]) ** 2).sum()
    return lin2db((r[skip: -skip] ** 2).sum() / difference)


def cqt(audio):
    """Forward transform. Takes audio at the sample rate specified in PARAMS.
    Returns a 4D vector (n_frames, channels, frame_size, n_bins)
     - n_frames depends on the audio length.
     - there are 4 channels: real part, imag part, real part sign, imag part sign.
     - frame_size: duration of each frame in samples. e.g., 2 ** 14 / 44100 = 0.38s
     - n_bins: number of frequency bins. Can be controled with binsPerOctave, min
         and max frequencies.
    """

    cq_frames, dc_frames, nb_frames = sp.nsgcqgram(x, **PARAMS)
    n_frames = len(cq_frames)

    stack = np.empty([n_frames, 4, cq_frames[0].shape[0], cq_frames[0].shape[1]])
    for f in range(n_frames):
        stack[f, 0] = np.log10(10000. * abs(cq_frames[f].real) + 1.)  # ch.0
        stack[f, 1] = np.log10(10000. * abs(cq_frames[f].imag) + 1.)  # ch.1
        stack[f, 2] = np.array(cq_frames[f].real < 0, dtype=int)  # ch.2
        stack[f, 3] = np.array(cq_frames[f].imag < 0, dtype=int)  # ch.3

    return stack, dc_frames, nb_frames


def icqt(cq_frames, dc_frames=None, nb_frames=None):
    """ Backward transform. Takes cqt frames and returns audio.
        Optionally DC and Nyquist bands that are outside the analysis range
        can be added.
    """
    stack = []
    nbins = cq_frames.shape[1] // 2

    stack = []
    for f in range(cq_frames.shape[0]):
        iscqt = cq_frames[f]

        iscqt_real = (10 ** iscqt[0] - 1.) / 10000.
        iscqt_real[np.where(iscqt[2])] *= -1

        iscqt_imag = (10 ** iscqt[1] - 1.) / 10000.
        iscqt_imag[np.where(iscqt[3])] *= -1

        stack.append(np.array(iscqt_real + 1j * iscqt_imag, dtype='complex64'))

    if not dc_frames:
        dc_frames = [np.zeros(30).astype('complex64')] * cq_frames.shape[0]

    if not nb_frames:
        nb_frames = [np.zeros(5308).astype('complex64')] * cq_frames.shape[0]

    y_frames = sp.nsgicqgram(stack, dc_frames, nb_frames, **PARAMS)

    return y_frames


if __name__ == "__main__":
    x = MonoLoader(filename='/home/pablo/vignesh.wav')()

    cq_frames, dc_frames, nb_frames = cqt(x)

    y_frames = icqt(cq_frames, dc_frames, nb_frames)

    cq_snr = SNR(x, y_frames[:x.size])
    print('Reconstruction w out of range data SNR: {:.3f} dB'.format(cq_snr))

    y_frames = icqt(cq_frames)
    cq_snr = SNR(x, y_frames[:x.size])
    print('Reconstruction w/o out of range data SNR: {:.3f} dB'.format(cq_snr))


    plt.matshow(cq_frames[5, 1]), plt.colorbar(), plt.show()
    plt.matshow(cq_frames[5, 3]), plt.colorbar(), plt.show()

    # Write reconstructed example
    # MonoWriter(filename='reconstructed.wav')(y_frames)
	import numpy as np
	import matplotlib.pyplot as plt

	import essentia.pytools.spectral as sp
	from essentia.standard import MonoLoader, MonoWriter
	from essentia import lin2db

	PARAMS = {
	'frameSize': 2 ** 14,
	'sampleRate': 44100,
	'minFrequency': 40,
	'maxFrequency': 15000,
	'binsPerOctave': 24,
	'normalize': 'sine',
	'transitionSize': 256
	}


	def SNR(r, t, skip=8192):
	"""
	r : reference
	t : test
	skip : number of samples to skip from the SNR computation
	"""
	difference = ((r[skip: -skip] - t[skip: -skip]) ** 2).sum()
	return lin2db((r[skip: -skip] ** 2).sum() / difference)


	def cqt(audio):
	"""Forward transform. Takes audio at the sample rate specified in PARAMS.
	Returns a 4D vector (n_frames, channels, frame_size, n_bins)
	- n_frames depends on the audio length.
	- there are 4 channels: real part, imag part, real part sign, imag part sign.
	- frame_size: duration of each frame in samples. e.g., 2 ** 14 / 44100 = 0.38s
	- n_bins: number of frequency bins. Can be controled with binsPerOctave, min
	and max frequencies.
	"""

	cq_frames, dc_frames, nb_frames = sp.nsgcqgram(x, **PARAMS)
	n_frames = len(cq_frames)

	stack = np.empty([n_frames, 4, cq_frames[0].shape[0], cq_frames[0].shape[1]])
	for f in range(n_frames):
	stack[f, 0] = np.log10(10000. * abs(cq_frames[f].real) + 1.) # ch.0
	stack[f, 1] = np.log10(10000. * abs(cq_frames[f].imag) + 1.) # ch.1
	stack[f, 2] = np.array(cq_frames[f].real < 0, dtype=int) # ch.2
	stack[f, 3] = np.array(cq_frames[f].imag < 0, dtype=int) # ch.3

	return stack, dc_frames, nb_frames


	def icqt(cq_frames, dc_frames=None, nb_frames=None):
	""" Backward transform. Takes cqt frames and returns audio.
	Optionally DC and Nyquist bands that are outside the analysis range
	can be added.
	"""
	stack = []
	nbins = cq_frames.shape[1] // 2

	stack = []
	for f in range(cq_frames.shape[0]):
	iscqt = cq_frames[f]

	iscqt_real = (10 ** iscqt[0] - 1.) / 10000.
	iscqt_real[np.where(iscqt[2])] *= -1

	iscqt_imag = (10 ** iscqt[1] - 1.) / 10000.
	iscqt_imag[np.where(iscqt[3])] *= -1

	stack.append(np.array(iscqt_real + 1j * iscqt_imag, dtype='complex64'))

	if not dc_frames:
	dc_frames = [np.zeros(30).astype('complex64')] * cq_frames.shape[0]

	if not nb_frames:
	nb_frames = [np.zeros(5308).astype('complex64')] * cq_frames.shape[0]

	y_frames = sp.nsgicqgram(stack, dc_frames, nb_frames, **PARAMS)

	return y_frames


	if __name__ == "__main__":
	x = MonoLoader(filename='/home/pablo/vignesh.wav')()

	cq_frames, dc_frames, nb_frames = cqt(x)

	y_frames = icqt(cq_frames, dc_frames, nb_frames)

	cq_snr = SNR(x, y_frames[:x.size])
	print('Reconstruction w out of range data SNR: {:.3f} dB'.format(cq_snr))

	y_frames = icqt(cq_frames)
	cq_snr = SNR(x, y_frames[:x.size])
	print('Reconstruction w/o out of range data SNR: {:.3f} dB'.format(cq_snr))


	plt.matshow(cq_frames[5, 1]), plt.colorbar(), plt.show()
	plt.matshow(cq_frames[5, 3]), plt.colorbar(), plt.show()

	# Write reconstructed example
	# MonoWriter(filename='reconstructed.wav')(y_frames)