Skip to content

Instantly share code, notes, and snippets.

@trueroad
Last active April 9, 2023 06:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trueroad/c5861963278d97bd73e032f5afaad987 to your computer and use it in GitHub Desktop.
Save trueroad/c5861963278d97bd73e032f5afaad987 to your computer and use it in GitHub Desktop.
Test program for librosa CQT algorithm
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test program for librosa CQT algorithm.
https://gist.github.com/trueroad/c5861963278d97bd73e032f5afaad987
Copyright (C) 2023 Masamichi Hosoda.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
"""
# The NPZ saved by this tool is similar in format to the NPZ generated by
# Experiment chromatic DFT algorithm (chromatic_dft.py and test_cdft.py)
# https://gist.github.com/trueroad/296dd6f397eb1ed65c1b04bc6a68c201
# and can be used by
# Experiment frequency component vectors DTW (fcvs_dtw.py)
# https://gist.github.com/trueroad/a5fee9ee0e447b78761dfdb336bbed7e
# and
# Frequency component vectors and SMF (fcvs_smf.py)
# https://gist.github.com/trueroad/216cb39902bccd776d8e9da670460fa3
# See
# https://www.wizard-notes.com/entry/python/librosa-cqt
# http://makotomurakami.com/blog/2020/06/08/5633/
# https://heartstat.net/2021/05/14/python_constant-q-transform/
import argparse
import os
from pathlib import Path
import sys
from typing import Any, Final, Union
import numpy as np
import numpy.typing as npt
import librosa
def generate_npz_by_cqt(filename_wav: Union[str, bytes, os.PathLike[Any]],
filename_npz: Union[str, os.PathLike[Any]],
shift_time: float = 0.0,
b_trim_leading: bool = False,
b_trim_trailing: bool = False,
top_db: int = 25,
sliding_size: float = 0.05,
filter_scale: float = 2.0
) -> None:
"""
Generate NPZ by CQT.
Args:
filename_wav (PathLike): Input WAV filename
filename_npz (PathLike): Output NPZ filename
shift_time (float): Shift time (positive: add beginning silence,
negative: cut beginning waveform, default=0.0, i.e. 0.0 [s])
b_trim_leading (bool): Trim leading silence
b_trim_trailing (bool): Trim trailing silence
trim_top_db (int): Trim's threshold in dB
The smaller the value, the longer the length to trim
and the shorter the length to remain.
sliding_size (float): Sliding size (CQT hop length) in second
(default=0.05, i.e. 50 [ms])
filter_scale (float): CQT filter scale
"""
fmin_str: Final[str] = 'C1'
octaves: Final[int] = 8
bins_per_octave: Final[int] = 12
# window: Final[str] = 'hann'
window: Final[str] = 'blackman'
data: npt.NDArray[Any]
sampling_rate: float
data, sampling_rate = librosa.load(filename_wav, sr=None, mono=True)
index: npt.NDArray[Any]
if b_trim_leading or b_trim_trailing:
_, index = librosa.effects.trim(data, top_db=top_db)
if b_trim_leading and b_trim_trailing:
data = data[index[0]:index[1]]
elif b_trim_leading:
data = data[index[0]:]
elif b_trim_trailing:
data = data[:index[1]]
if shift_time > 0.0:
silence: npt.NDArray[Any] \
= np.zeros(int(shift_time * sampling_rate), dtype=data.dtype)
data = np.concatenate([silence, data])
elif shift_time < 0.0:
data = data[int(-shift_time * sampling_rate):]
cqt: npt.NDArray[np.complex64]
cqt = librosa.cqt(data,
sr=sampling_rate,
hop_length=int(sampling_rate * sliding_size),
fmin=librosa.note_to_hz(fmin_str),
n_bins=octaves * bins_per_octave,
bins_per_octave=bins_per_octave,
filter_scale=filter_scale,
window=window)
power: npt.NDArray[np.float64]
power = np.abs(cqt, dtype=np.float64) ** 2.0
vectors: npt.NDArray[np.float64] = np.fliplr(np.rot90(power, -1))
np.savez_compressed(filename_npz, chroma_vectors=vectors)
def main() -> None:
"""Test main."""
parser: argparse.ArgumentParser = argparse.ArgumentParser()
parser.add_argument('DIR', help='Input WAV/Output NPZ directory '
'(recursive)')
parser.add_argument('--trim-leading',
help='Trim leading silence',
action='store_true')
parser.add_argument('--trim-trailing',
help='Trim trailing silence',
action='store_true')
parser.add_argument('--trim-top-db',
help="Trim's threshold in dB "
'(default=25)',
type=int, default=25)
parser.add_argument('--shift-time',
help='Shift time (positive: add beginning silence, '
'negative: cut beginning waveform, '
'default=0.0, i.e. 0.0 [s])',
type=float, default=0.0)
parser.add_argument('--sliding-size',
help='Sliding size (CQT hop length) in second '
'(default=0.05, i.e. 50 [ms])',
type=float, default=0.05)
parser.add_argument('--filter-scale',
help='CQT filter scale '
'(default=2.0)',
type=float, default=2.0)
args = parser.parse_args()
vargs = vars(args)
dirname: str = vargs['DIR']
b_trim_leading: bool = vargs['trim_leading']
b_trim_trailing: bool = vargs['trim_trailing']
trim_top_db: int = args.trim_top_db
shift_time: float = args.shift_time
sliding_size_time: float = args.sliding_size
filter_scale: float = args.filter_scale
print(f'Directory : {dirname}\n'
f'Trim leading : {b_trim_leading}\n'
f'Trim trailing: {b_trim_trailing}\n'
f'Trim top db : {trim_top_db} [dB]\n'
f'Shift time : {shift_time} [s]\n'
f'Sliding Size : {sliding_size_time} [s]\n'
f'Filter scale : {filter_scale}\n')
path_wav: Path
for path_wav in Path(dirname).glob('**/*.wav'):
path_npz: Path = path_wav.with_suffix('.npz')
print(f'{path_wav}\n'
f' -> {path_npz}')
generate_npz_by_cqt(filename_wav=path_wav,
filename_npz=path_npz,
b_trim_leading=b_trim_leading,
b_trim_trailing=b_trim_trailing,
shift_time=shift_time,
sliding_size=sliding_size_time,
filter_scale=filter_scale)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test NPZ to PNG.
https://gist.github.com/trueroad/c5861963278d97bd73e032f5afaad987
Copyright (C) 2023 Masamichi Hosoda.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
"""
# This program generates a PNG from the NPZ generaged by test_cqt.py.
# The PNG is similar to the one generated by
# Experiment chromatic DFT algorithm (chromatic_dft.py and test_cdft.py)
# https://gist.github.com/trueroad/296dd6f397eb1ed65c1b04bc6a68c201
import argparse
import sys
from typing import Any
from PIL import Image
import numpy as np
import numpy.typing as npt
def main() -> None:
"""Test main."""
parser: argparse.ArgumentParser = argparse.ArgumentParser()
parser.add_argument('INPUT.npz', help='Input NPZ filename')
parser.add_argument('OUTPUT.png', help='Output PNG filename')
parser.add_argument('--gamma',
help='Encoding gamma for PNG (default=0.45)',
type=float, default=0.45)
parser.add_argument('--vscale',
help='V scaling factor for PNG (default=4)',
type=int, default=4)
parser.add_argument('--hscale',
help='H scaling factor for PNG (default=1)',
type=int, default=1)
args = parser.parse_args()
vargs = vars(args)
filename_npz: str = vargs['INPUT.npz']
filename_png: str = vargs['OUTPUT.png']
gamma: float = args.gamma
vscale: int = args.vscale
hscale: int = args.hscale
print(f'Input NPZ filename : {filename_npz}\n'
f'Output PNG filename: {filename_png}\n'
f'Encoding gamma : {gamma}\n'
f'V scale : {vscale}\n'
f'H scale : {hscale}\n')
loaded: np.lib.npyio.NpzFile = np.load(filename_npz)
vectors: npt.NDArray[np.float64] = loaded['chroma_vectors']
cvs_uint: npt.NDArray[np.uint8] = \
((vectors / vectors.max()) ** gamma
* 255).astype(np.uint8)
pil_img = Image.fromarray(np.rot90(cvs_uint))
x, y = pil_img.size
pil_img.resize(size=(x * hscale, y * vscale),
resample=Image.NEAREST).save(filename_png)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment