Last active
May 31, 2023 19:35
-
-
Save tam17aki/3826197d71c67b5df17bb6022f1b753a to your computer and use it in GitHub Desktop.
ピッチシフト (再生時間を変えずに音の高さを変える)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" 音声情報処理 n本ノック !! """ | |
# Copyright (C) 2020 by Akira TAMAMORI | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
# Commentary: | |
# - ピッチシフト (再生時間を変えずに音の高さを変える) | |
# - タイムストレッチとリサンプリングの合わせ技 | |
from scipy.io import wavfile | |
import numpy as np | |
from scipy.signal import windows | |
IN_WAVE_FILE = "in.wav" # 16bit モノラル音声(前提) | |
OUT_WAVE_FILE = "out.wav" | |
PITCH = 1.5 # ピッチの倍率 | |
N_TERM = 30 # 標本化定理の近似項数 | |
RATE = 1.0 / PITCH # 再生速度の倍率 | |
def resampling(data_in): | |
""" リサンプリング (音を高く/低くするが再生時間も変わる)""" | |
# wavの読み込み | |
data_in = data_in.astype(np.float64) | |
n_samples = len(data_in) | |
n_samples_out = int(n_samples / PITCH) | |
data_out = np.zeros(n_samples_out) | |
for n in range(n_samples_out): | |
analog_time = PITCH * n # アナログ信号の時刻 t を求める | |
digital_time = int(analog_time) # 整数値に変換 | |
# 標本化定理に基づくアナログ信号復元によるリサンプリング | |
sum_index = np.arange(digital_time - N_TERM // 2, | |
digital_time + N_TERM // 2 + 1) | |
start_index = np.min(np.where(sum_index >= 0)) | |
end_index = np.max(np.where(sum_index <= n_samples)) | |
sinc_vector = np.sinc(analog_time - sum_index[start_index:end_index]) | |
data_out[n] = data_in[sum_index[start_index:end_index]].dot(sinc_vector) | |
return data_out | |
def calc_autocorr(wave_data, corr_size, lag): | |
""" 自己相関関数を計算する """ | |
autocorr = 0.0 | |
for i in range(corr_size): | |
autocorr += wave_data[i] * wave_data[i + lag] | |
return autocorr | |
def get_period(wave_data, period_min, period_max, corr_size): | |
""" 相関関数のピーク位置に基づいて周期を計算する """ | |
corr_max = 0.0 | |
period = period_min | |
for p in range(period_min, period_max): | |
corr = calc_autocorr(wave_data, corr_size, p) # 相関関数の計算 | |
# 相関関数のピークを求め、そのピーク位置を周期として取得する | |
if corr > corr_max: | |
corr_max = corr # 相関関数のピーク値を更新 | |
period = p # 周期を取得 | |
return period | |
def time_stretch(data_in, fs): | |
""" タイムストレッチ (音の高さを変えずに再生時間を変更する) """ | |
# wavの読み込み | |
data_in = data_in.astype(np.float64) | |
n_samples = len(data_in) | |
corr_size = int(fs * 0.01) # 相関関数のサイズ 10ms | |
min_period = int(fs * 0.005) # 窓内における音データの周期の最小値 5ms | |
max_period = int(fs * 0.02) # 窓内における音データの周期の最大値 20ms | |
offset_in = 0 # オーバーラップアドの開始位置 (入力側) | |
offset_out = 0 # オーバーラップアドの開始位置 (出力側) | |
data_out = np.zeros(int(n_samples / RATE) + 1) | |
while (offset_in + max_period * 2 < n_samples): | |
# 窓内の音データに対して(窓サイズ corr_size)、 | |
# 相関関数を利用することで音の周期を計算する | |
period = get_period(data_in[offset_in:], | |
min_period, max_period, corr_size) | |
if RATE >= 1.0: # fast | |
# オーバーラップアド(時間を縮める) | |
window = windows.hann(2 * period) | |
for n in range(period): | |
data_out[offset_out + n] = \ | |
data_in[offset_in + n] * window[period + n] | |
data_out[offset_out + n] += \ | |
data_in[offset_in + period + n] * window[n] | |
del window | |
# オーバーラップアドしていない音データをそのままコピー | |
q = int(period / (RATE - 1.0) + 0.5) | |
for n in range(period, n_samples): | |
if n >= q: | |
break | |
elif offset_in + period + n >= n_samples: | |
break | |
else: | |
data_out[offset_out + n] = data_in[offset_in + period + n] | |
offset_in += period + q | |
offset_out += q | |
else: # slow | |
# オーバーラップアドしていない音データをそのままコピー | |
data_out[offset_out: offset_out + period] = \ | |
data_in[offset_in: offset_in + period] | |
# オーバーラップアド(時間を伸ばす) | |
window = windows.hann(2 * period) | |
for n in range(period): | |
data_out[offset_out + period + n] = \ | |
data_in[offset_in + n] * window[n] | |
data_out[offset_out + period + n] += \ | |
data_in[offset_in + period + n] * window[period + n] | |
del window | |
# オーバーラップアドしていない音データをそのままコピー | |
q = int(period * RATE / (1.0 - RATE) + 0.5) | |
for n in range(period, n_samples): | |
if n >= q: | |
break | |
elif offset_in + period + n >= n_samples: | |
break | |
else: | |
data_out[offset_out + period + n] = data_in[offset_in + n] | |
offset_in += q | |
offset_out += period + q | |
return data_out | |
def pitch_shift(): | |
""" 再生時間を変えずに音の高さを変える """ | |
# wavの読み込み | |
fs, data_in = wavfile.read(IN_WAVE_FILE) | |
# リサンプリング | |
data_out = resampling(data_in) | |
# タイムストレッチ | |
data_out = time_stretch(data_out, fs) | |
# wavの書き込み | |
data_out = data_out.astype(np.int16) | |
wavfile.write(OUT_WAVE_FILE, fs, data_out) | |
if __name__ in '__main__': | |
pitch_shift() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment