Skip to content

Instantly share code, notes, and snippets.

@andres-fr
Created May 13, 2022 13:04
Show Gist options
  • Save andres-fr/2b7515ca8c10d4dee27583fa301971cb to your computer and use it in GitHub Desktop.
Save andres-fr/2b7515ca8c10d4dee27583fa301971cb to your computer and use it in GitHub Desktop.
Flexible and efficient conversion from mono WAV to log-mel spectrogram
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
"""
import librosa
import numpy as np
# ##############################################################################
# # AUDIO
# ##############################################################################
class WavToLogmel:
"""
"""
def __init__(self, samplerate, winsize, hopsize, n_mels,
mel_fmin=50, mel_fmax=8_000, stft_window="hann"):
"""
:param samplerate: Expected audio input samplerate.
:param winsize: Window size for the STFT (and mel).
:param winsize: Hop size for the STFT (and mel).
:param stft_window: Windowing function for the STFT.
:param n_mels: Number of mel bins.
:param mel_fmin: Lowest mel bin, in Hz.
:param mel_fmax: Highest mel bin, in Hz.
"""
self.winsize = winsize
self.hopsize = hopsize
self.stft_window = stft_window
self.mel_filt = librosa.filters.mel(sr=samplerate,
n_fft=winsize, n_mels=n_mels,
fmin=mel_fmin, fmax=mel_fmax)
def __call__(self, wav_arr):
"""
:param wav_arr: 1D audio array (float)
:returns: log-mel spectrogram of shape ``(n_mels, t)``
"""
stft_spec = np.abs(librosa.stft(y=wav_arr,
n_fft=self.winsize,
hop_length=self.hopsize,
center=True,
window=self.stft_window,
pad_mode="reflect")) ** 2
mel_spec = np.dot(self.mel_filt, stft_spec)
logmel_spec = librosa.power_to_db(mel_spec, ref=1.0, amin=1e-10,
top_db=None)
return logmel_spec # (nmels, t)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment