keunwoochoi/equal_loudness.py

## equal_loudness.py

from scipy.signal import firls
import numpy as np
import torch
import torch.nn.functional as F

elc = np.array([[31.5, -29.9],  # freq, response
                [63, -23.9],
                [100, -19.8],
                [200, -13.8],
                [400, -7.8],
                [800, -1.9],
                [1000, 0.],
                [2000, 5.6],
                [3150, 9.0],
                [4000, 10.5],
                [5000, 11.7],
                [6300, 12.2],
                [7100, 12.0],
                [8000, 11.4],
                [9000, 10.1],
                [10000, 8.1],
                [12500, 0],
                [14000, -5.3],
                [16000, -11.7],
                [20000, -22.2],
                [31500, -42.7]])

def equal_filter(n_tap, sr):
    """returns a linear-phase FIR filter that simulates the equal loudness contour.
    (suppress low freq; amplify 3kHz)
    """
    assert n_tap % 2 == 1, "num tap should be an odd number otherwise it's odd.."
    freq_idx = sum(elc[:, 0] <= sr // 2)
    freq = elc[:freq_idx, 0]
    desired = 10 ** (elc[:freq_idx, 1] / 20.)
    return firls(n_tap, freq, desired, fs=sr)


if __name__ == '__main__':

    SR = 44100
    len_filter = 9
    audio_signal = torch.from_numpy(librosa.load('some_audio_file.wav', sr=SR, mono=True))
    # get the filter taps
    elc_filter = torch.from_numpy(equal_filter(n_tap=len_filter, sr=SR))

    # flip it to use torch's conv1d function, which does NOT flip the kernel.
    elc_filter = torch.flip(elc_filter, dims=(0,))

    # make it 3d for F.conv1d compatibility
    elc_filter = torch.reshape(elc_filter, (1, 1, -1))  # in_ch, out_ch, filter_length

    # make input batch for F.conv1d compatibility
    batch_audio = torch.reshape(audio_signal, (1, 1, -1))  # now (1, 1, time), ready for F.conv1d

    perceptual_simulated_batch_audio = F.conv1d(batch_audio, elc_filter, padding=len_filter // 2)

	from scipy.signal import firls
	import numpy as np
	import torch
	import torch.nn.functional as F

	elc = np.array([[31.5, -29.9], # freq, response
	[63, -23.9],
	[100, -19.8],
	[200, -13.8],
	[400, -7.8],
	[800, -1.9],
	[1000, 0.],
	[2000, 5.6],
	[3150, 9.0],
	[4000, 10.5],
	[5000, 11.7],
	[6300, 12.2],
	[7100, 12.0],
	[8000, 11.4],
	[9000, 10.1],
	[10000, 8.1],
	[12500, 0],
	[14000, -5.3],
	[16000, -11.7],
	[20000, -22.2],
	[31500, -42.7]])

	def equal_filter(n_tap, sr):
	"""returns a linear-phase FIR filter that simulates the equal loudness contour.
	(suppress low freq; amplify 3kHz)
	"""
	assert n_tap % 2 == 1, "num tap should be an odd number otherwise it's odd.."
	freq_idx = sum(elc[:, 0] <= sr // 2)
	freq = elc[:freq_idx, 0]
	desired = 10 ** (elc[:freq_idx, 1] / 20.)
	return firls(n_tap, freq, desired, fs=sr)


	if __name__ == '__main__':

	SR = 44100
	len_filter = 9
	audio_signal = torch.from_numpy(librosa.load('some_audio_file.wav', sr=SR, mono=True))
	# get the filter taps
	elc_filter = torch.from_numpy(equal_filter(n_tap=len_filter, sr=SR))

	# flip it to use torch's conv1d function, which does NOT flip the kernel.
	elc_filter = torch.flip(elc_filter, dims=(0,))

	# make it 3d for F.conv1d compatibility
	elc_filter = torch.reshape(elc_filter, (1, 1, -1)) # in_ch, out_ch, filter_length

	# make input batch for F.conv1d compatibility
	batch_audio = torch.reshape(audio_signal, (1, 1, -1)) # now (1, 1, time), ready for F.conv1d

	perceptual_simulated_batch_audio = F.conv1d(batch_audio, elc_filter, padding=len_filter // 2)