Skip to content

Instantly share code, notes, and snippets.

@zepadovani
Last active November 15, 2023 22:33
Show Gist options
  • Save zepadovani/3b4eb0c5575882a58889d186bead578d to your computer and use it in GitHub Desktop.
Save zepadovani/3b4eb0c5575882a58889d186bead578d to your computer and use it in GitHub Desktop.
librosa_normalization_factor_calculation.py
import numpy as np
import scipy.signal.windows as sciwins
import librosa
import matplotlib.pyplot as plt
## The idea of this script is to calculate the normalization factor for a given window function
## The normalization factor is the reciprocal of the sum of the values of the chosen frame
## The normalization factor is first calculated for a very short sinusoidal signal, with the
## frequency set in the middle of the spectrum and amplitude set to 1
## From this, the normalization factor is calculated for a given window function and is then applied
## It is important to note that the normalization factor is calculated for analysis and spectral processing
## purposes, and is not the same as the normalization factor used for synthesis purposes as librosa.isftf
## has its own normalization factor built in.
## Also important to note that the normalization factor is calculated for a specific frame, and is not dealing
## with consecutive/overlapping frames.
## this works well for hanning, but other windows are not normalized
## have also tried just 2/np.sum(window), but it is not right for some types of windows
# def normfactor(window):
# N = len(window)
# norm_factor = (1 /(np.sum(window) / (N-1)))
# return norm_factor
def calculate_normalization_factor(window, sr=48000, n_fft=2048, normtype='max', reciprocal=False):
"""
Calculates the normalization factor for a given window function.
Args:
window (string or tuple): The window function to use for the STFT.
sr (int): The sampling rate of the signal.
n_fft (int): The number of FFT points to use for the STFT.
normtype (string): The type of normalization to use. Can be 'max' or 'sum'.
If 'max', the normalization factor is the reciprocal of the maximum value of the chosen frame.
If 'sum', the normalization factor is the reciprocal of the sum of the values of the chosen frame.
reciprocal (bool): Whether to return the reciprocal of the normalization factor.
Returns:
float: The normalization factor.
"""
# Generate a test sinusoidal signal/
dt = 1/sr
t = np.arange(0, dt*n_fft*5, dt)
frequency = librosa.fft_frequencies(sr=sr, n_fft=n_fft)[int(n_fft/4)]
signal = 1 * np.cos(2 * np.pi * frequency * t) ## normalizing according to a sine, amp1, in the first bin
# Calculate STFT and magnitudes
stft_matrix = librosa.stft(signal, n_fft=n_fft, hop_length=n_fft, window=window)
magnitudes = np.abs(stft_matrix)
# Choose a frame for normalization
frame_to_get_normfact = magnitudes[:, 2]
# Calculate the normalization factor
if(normtype == 'max'):
normalization_factor = 1 / np.max(frame_to_get_normfact)
elif(normtype == 'sum'):
normalization_factor = 1 / np.sum(frame_to_get_normfact)
if(reciprocal):
normalization_factor = 1 / normalization_factor
return normalization_factor
def generate_test_sinusoid(frequency, sr=48000, amp=1, phase=0, duration=3):
"""
Generate a sinusoidal waveform with the specified frequency, sampling rate, amplitude, phase, and duration.
Args:
frequency (float): The frequency of the sinusoid in Hz.
sr (int, optional): The sampling rate of the sinusoid in Hz. Defaults to 48000.
amp (float, optional): The amplitude of the sinusoid. Defaults to 1.
phase (float, optional): The phase of the sinusoid in radians. Defaults to 0.
duration (float, optional): The duration of the sinusoid in seconds. Defaults to 3.
Returns:
numpy.ndarray: A 1D numpy array containing the generated sinusoid.
"""
t = np.arange(0, duration, 1/sr)
sinusoid = amp * np.cos(2 * np.pi * frequency * t + phase)
return sinusoid
def raw_magnitude_stft(array, window, n_fft=2048, overlap=4):
"""
Compute the raw magnitude STFT of an audio signal.
Parameters:
-----------
array : np.ndarray
Audio signal to compute the STFT of.
window : str or tuple
Window function to use for the STFT.
n_fft : int, optional
Number of FFT points to use. Defaults to 2048.
overlap : int, optional
Number of points of overlap between consecutive frames. Defaults to 4.
Returns:
--------
magnitudes : np.ndarray
Magnitude STFT of the input signal.
phases : np.ndarray
Phase STFT of the input signal.
"""
hop = int(n_fft / overlap)
stft_matrix = librosa.stft(array, n_fft=n_fft, hop_length=hop, window=window)
magnitudes = np.abs(stft_matrix)
phases = np.angle(stft_matrix)
return magnitudes, phases
# Parameters
sr = 48000
n_fft = 512
overlaps = 2
hop = int(n_fft / overlaps)
# Generate test signal
sinfreqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
sine_frequency = sinfreqs[10]
test_signal = generate_test_sinusoid(sine_frequency, amp=0.65)
# Calculate STFT magnitudes
# win = sciwins.hann(n_fft, sym=False)
win = sciwins.blackmanharris(n_fft, sym=False)
mags, phases = raw_magnitude_stft(test_signal, win, n_fft=n_fft, overlap=overlaps)
# Calculate normalization factor
normfact = calculate_normalization_factor(win, sr=sr, n_fft=n_fft, normtype='sum')
# Apply normalization factor to a specific frame
norm_frame = mags[:, 10] * normfact
# Print and plot results
print("Normalization Factor:", normfact)
print("Sum after normalization:", np.sum(norm_frame))
plt.plot(norm_frame)
plt.title('Normalized Frame')
plt.xlabel('Frequency Bin')
plt.ylabel('Magnitude')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment