Last active October 29, 2023 16:44
def local_peaks(
log_spectrogram: np.ndarray, amp_min: float, p_nn: int
) -> List[Tuple[int, int]]:
Defines a local neighborhood and finds the local peaks
in the spectrogram, which must be larger than the
specified `amp_min`.
log_spectrogram : numpy.ndarray, shape=(n_freq, n_time)
Log-scaled spectrogram. Columns are the periodograms of
successive segments of a frequency-time spectrum.
amp_min : float
Amplitude threshold applied to local maxima
p_nn : int
The neighborhood radius used for determining if a spectrogram value
is a local peak. Specified in spectrogram cells.
List[Tuple[int, int]]
Time-bin and frequency-bin index-values of the local peaks in spectrogram.
Sorted by ascending frequency and then time.
The local peaks are returned in column-major order for the spectrogram.
That is, the peaks are ordered by time. That is, we look for nearest
neighbors of increasing frequencies at the same times, and then move to
the next time bin.
def plot_song(
song: Union[str, Path, _np.ndarray],
sampling_rate: int = _defaults.SAMPLING_RATE,
min_frac_amp_cutoff: float = _defaults.MIN_FRAC_AMP_CUTOFF,
local_peak_nn_radius: int = _defaults.LOCAL_PEAK_NN_RADIUS,
) -> Tuple[Figure, Axes]:
"""Plot a spectrogram and fingerprint features for a song.
song : Union[str, pathlib.Path, numpy.ndarray]
The filepath to a song-file, or the digital signal itself.
sampling_rate: int, optional (default=_defaults.SAMPLING_RATE)
The target sampling rate used to read in an audio file
min_frac_amp_cutoff: float, optional (default=_defaults.MIN_FRAC_AMP_CUTOFF)
The fractional portion of intensities for which the cutoff is selected.
E.g. frac_cut=0.8 will produce a cutoff intensity such that the bottom 80%
of intensities are excluded.
local_peak_nn_radius: int, optional (default=_defaults.LOCAL_PEAK_NN_RADIUS)
The neighborhood radius used for determining if a spectrogram value
is a local peak. Specified in spectrogram cells.
Tuple[matplotlib.pyplot.Figure, matplotlib.pyplot.Axes]"""
from microphone.config import settings
from pathlib import Path
if isinstance(song, (str, Path)):
digital, fs = _librosa.load(str(song), sr=sampling_rate, mono=True)
elif isinstance(song, _np.ndarray):
digital = song
fs = settings.rate
raise TypeError("`song` must be a path to a song or an audio signal array")
# get the spectrogram, along with the size of the
# frequency bins (`df`) and time bins (`dt`)
S, cut, fig, ax, df, dt = digital_to_spec(
digital, fs, frac_cut=min_frac_amp_cutoff, plot=True
# Find the positions of the local peaks in the spectrogram.
# The locations returned here are column/row indices.
peaks = local_peaks(S, cut, p_nn=local_peak_nn_radius)
t_loc, f_loc = zip(*peaks)
# We need to scale the time-indices by dt and the frequence-indices
# by df so that the locations of the local peaks are in the right
# place on the spectogram
times = dt * (_np.array(tuple(t_loc)) + 1)
freqs = df * (_np.array(tuple(f_loc)) + 0.5) # add 0.5 so peaks are in the middle of the bins
ax.scatter(times, freqs, s=4, color="white")
ax.set_xlabel("Time (sec)")
ax.set_ylabel("Frequency (Hz)")
return fig, ax
