Last active
April 9, 2023 06:26
-
-
Save trueroad/c5861963278d97bd73e032f5afaad987 to your computer and use it in GitHub Desktop.
Test program for librosa CQT algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Test program for librosa CQT algorithm. | |
https://gist.github.com/trueroad/c5861963278d97bd73e032f5afaad987 | |
Copyright (C) 2023 Masamichi Hosoda. | |
All rights reserved. | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions | |
are met: | |
* Redistributions of source code must retain the above copyright notice, | |
this list of conditions and the following disclaimer. | |
* Redistributions in binary form must reproduce the above copyright notice, | |
this list of conditions and the following disclaimer in the documentation | |
and/or other materials provided with the distribution. | |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
ARE DISCLAIMED. | |
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
SUCH DAMAGE. | |
""" | |
# The NPZ saved by this tool is similar in format to the NPZ generated by | |
# Experiment chromatic DFT algorithm (chromatic_dft.py and test_cdft.py) | |
# https://gist.github.com/trueroad/296dd6f397eb1ed65c1b04bc6a68c201 | |
# and can be used by | |
# Experiment frequency component vectors DTW (fcvs_dtw.py) | |
# https://gist.github.com/trueroad/a5fee9ee0e447b78761dfdb336bbed7e | |
# and | |
# Frequency component vectors and SMF (fcvs_smf.py) | |
# https://gist.github.com/trueroad/216cb39902bccd776d8e9da670460fa3 | |
# See | |
# https://www.wizard-notes.com/entry/python/librosa-cqt | |
# http://makotomurakami.com/blog/2020/06/08/5633/ | |
# https://heartstat.net/2021/05/14/python_constant-q-transform/ | |
import argparse | |
import os | |
from pathlib import Path | |
import sys | |
from typing import Any, Final, Union | |
import numpy as np | |
import numpy.typing as npt | |
import librosa | |
def generate_npz_by_cqt(filename_wav: Union[str, bytes, os.PathLike[Any]], | |
filename_npz: Union[str, os.PathLike[Any]], | |
shift_time: float = 0.0, | |
b_trim_leading: bool = False, | |
b_trim_trailing: bool = False, | |
top_db: int = 25, | |
sliding_size: float = 0.05, | |
filter_scale: float = 2.0 | |
) -> None: | |
""" | |
Generate NPZ by CQT. | |
Args: | |
filename_wav (PathLike): Input WAV filename | |
filename_npz (PathLike): Output NPZ filename | |
shift_time (float): Shift time (positive: add beginning silence, | |
negative: cut beginning waveform, default=0.0, i.e. 0.0 [s]) | |
b_trim_leading (bool): Trim leading silence | |
b_trim_trailing (bool): Trim trailing silence | |
trim_top_db (int): Trim's threshold in dB | |
The smaller the value, the longer the length to trim | |
and the shorter the length to remain. | |
sliding_size (float): Sliding size (CQT hop length) in second | |
(default=0.05, i.e. 50 [ms]) | |
filter_scale (float): CQT filter scale | |
""" | |
fmin_str: Final[str] = 'C1' | |
octaves: Final[int] = 8 | |
bins_per_octave: Final[int] = 12 | |
# window: Final[str] = 'hann' | |
window: Final[str] = 'blackman' | |
data: npt.NDArray[Any] | |
sampling_rate: float | |
data, sampling_rate = librosa.load(filename_wav, sr=None, mono=True) | |
index: npt.NDArray[Any] | |
if b_trim_leading or b_trim_trailing: | |
_, index = librosa.effects.trim(data, top_db=top_db) | |
if b_trim_leading and b_trim_trailing: | |
data = data[index[0]:index[1]] | |
elif b_trim_leading: | |
data = data[index[0]:] | |
elif b_trim_trailing: | |
data = data[:index[1]] | |
if shift_time > 0.0: | |
silence: npt.NDArray[Any] \ | |
= np.zeros(int(shift_time * sampling_rate), dtype=data.dtype) | |
data = np.concatenate([silence, data]) | |
elif shift_time < 0.0: | |
data = data[int(-shift_time * sampling_rate):] | |
cqt: npt.NDArray[np.complex64] | |
cqt = librosa.cqt(data, | |
sr=sampling_rate, | |
hop_length=int(sampling_rate * sliding_size), | |
fmin=librosa.note_to_hz(fmin_str), | |
n_bins=octaves * bins_per_octave, | |
bins_per_octave=bins_per_octave, | |
filter_scale=filter_scale, | |
window=window) | |
power: npt.NDArray[np.float64] | |
power = np.abs(cqt, dtype=np.float64) ** 2.0 | |
vectors: npt.NDArray[np.float64] = np.fliplr(np.rot90(power, -1)) | |
np.savez_compressed(filename_npz, chroma_vectors=vectors) | |
def main() -> None: | |
"""Test main.""" | |
parser: argparse.ArgumentParser = argparse.ArgumentParser() | |
parser.add_argument('DIR', help='Input WAV/Output NPZ directory ' | |
'(recursive)') | |
parser.add_argument('--trim-leading', | |
help='Trim leading silence', | |
action='store_true') | |
parser.add_argument('--trim-trailing', | |
help='Trim trailing silence', | |
action='store_true') | |
parser.add_argument('--trim-top-db', | |
help="Trim's threshold in dB " | |
'(default=25)', | |
type=int, default=25) | |
parser.add_argument('--shift-time', | |
help='Shift time (positive: add beginning silence, ' | |
'negative: cut beginning waveform, ' | |
'default=0.0, i.e. 0.0 [s])', | |
type=float, default=0.0) | |
parser.add_argument('--sliding-size', | |
help='Sliding size (CQT hop length) in second ' | |
'(default=0.05, i.e. 50 [ms])', | |
type=float, default=0.05) | |
parser.add_argument('--filter-scale', | |
help='CQT filter scale ' | |
'(default=2.0)', | |
type=float, default=2.0) | |
args = parser.parse_args() | |
vargs = vars(args) | |
dirname: str = vargs['DIR'] | |
b_trim_leading: bool = vargs['trim_leading'] | |
b_trim_trailing: bool = vargs['trim_trailing'] | |
trim_top_db: int = args.trim_top_db | |
shift_time: float = args.shift_time | |
sliding_size_time: float = args.sliding_size | |
filter_scale: float = args.filter_scale | |
print(f'Directory : {dirname}\n' | |
f'Trim leading : {b_trim_leading}\n' | |
f'Trim trailing: {b_trim_trailing}\n' | |
f'Trim top db : {trim_top_db} [dB]\n' | |
f'Shift time : {shift_time} [s]\n' | |
f'Sliding Size : {sliding_size_time} [s]\n' | |
f'Filter scale : {filter_scale}\n') | |
path_wav: Path | |
for path_wav in Path(dirname).glob('**/*.wav'): | |
path_npz: Path = path_wav.with_suffix('.npz') | |
print(f'{path_wav}\n' | |
f' -> {path_npz}') | |
generate_npz_by_cqt(filename_wav=path_wav, | |
filename_npz=path_npz, | |
b_trim_leading=b_trim_leading, | |
b_trim_trailing=b_trim_trailing, | |
shift_time=shift_time, | |
sliding_size=sliding_size_time, | |
filter_scale=filter_scale) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Test NPZ to PNG. | |
https://gist.github.com/trueroad/c5861963278d97bd73e032f5afaad987 | |
Copyright (C) 2023 Masamichi Hosoda. | |
All rights reserved. | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions | |
are met: | |
* Redistributions of source code must retain the above copyright notice, | |
this list of conditions and the following disclaimer. | |
* Redistributions in binary form must reproduce the above copyright notice, | |
this list of conditions and the following disclaimer in the documentation | |
and/or other materials provided with the distribution. | |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
ARE DISCLAIMED. | |
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
SUCH DAMAGE. | |
""" | |
# This program generates a PNG from the NPZ generaged by test_cqt.py. | |
# The PNG is similar to the one generated by | |
# Experiment chromatic DFT algorithm (chromatic_dft.py and test_cdft.py) | |
# https://gist.github.com/trueroad/296dd6f397eb1ed65c1b04bc6a68c201 | |
import argparse | |
import sys | |
from typing import Any | |
from PIL import Image | |
import numpy as np | |
import numpy.typing as npt | |
def main() -> None: | |
"""Test main.""" | |
parser: argparse.ArgumentParser = argparse.ArgumentParser() | |
parser.add_argument('INPUT.npz', help='Input NPZ filename') | |
parser.add_argument('OUTPUT.png', help='Output PNG filename') | |
parser.add_argument('--gamma', | |
help='Encoding gamma for PNG (default=0.45)', | |
type=float, default=0.45) | |
parser.add_argument('--vscale', | |
help='V scaling factor for PNG (default=4)', | |
type=int, default=4) | |
parser.add_argument('--hscale', | |
help='H scaling factor for PNG (default=1)', | |
type=int, default=1) | |
args = parser.parse_args() | |
vargs = vars(args) | |
filename_npz: str = vargs['INPUT.npz'] | |
filename_png: str = vargs['OUTPUT.png'] | |
gamma: float = args.gamma | |
vscale: int = args.vscale | |
hscale: int = args.hscale | |
print(f'Input NPZ filename : {filename_npz}\n' | |
f'Output PNG filename: {filename_png}\n' | |
f'Encoding gamma : {gamma}\n' | |
f'V scale : {vscale}\n' | |
f'H scale : {hscale}\n') | |
loaded: np.lib.npyio.NpzFile = np.load(filename_npz) | |
vectors: npt.NDArray[np.float64] = loaded['chroma_vectors'] | |
cvs_uint: npt.NDArray[np.uint8] = \ | |
((vectors / vectors.max()) ** gamma | |
* 255).astype(np.uint8) | |
pil_img = Image.fromarray(np.rot90(cvs_uint)) | |
x, y = pil_img.size | |
pil_img.resize(size=(x * hscale, y * vscale), | |
resample=Image.NEAREST).save(filename_png) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment