Skip to content

Instantly share code, notes, and snippets.

@tam17aki
Last active July 3, 2020 17:06
Show Gist options
  • Save tam17aki/14b54e4014b2ce2cfbe42ec5f3de4904 to your computer and use it in GitHub Desktop.
Save tam17aki/14b54e4014b2ce2cfbe42ec5f3de4904 to your computer and use it in GitHub Desktop.
import pyworld as pw
import pysptk
from scipy.io import wavfile
import numpy as np
fs, x = wavfile.read(pysptk.util.example_audio_file())
assert fs == 16000
wavfile.write('./orig.wav', fs, x)
# shortからfloatに変換します
x = x.astype(np.float64)
# 特徴量抽出(基本周波数、スペクトル包絡、非周期性指標)
f0, sp, ap = pw.wav2world(x, fs)
fft_size = pw.get_cheaptrick_fft_size(fs)
# 特徴量の次元削減 -> DCTベースの手法
# https://www.isca-speech.org/archive/Interspeech_2017/abstracts/0067.html
sp_dim = 50 # 50次元まで削減しても音質は削減前と変わらない
code_sp = pw.code_spectral_envelope(sp, fs, sp_dim)
code_ap = pw.code_aperiodicity(ap, fs)
# The `dim` of code ap is defined based on the `fs` as follow:
# fs = `16000` : `1`
# fs = `22050` : `2`
# fs = `44100` : `5`
# fs = `48000` : `5`
decode_sp = pw.decode_spectral_envelope(code_sp, fs, fft_size)
decode_ap = pw.decode_aperiodicity(code_ap, fs, fft_size)
# 再合成
y = pw.synthesize(f0, sp, ap, fs)
y = y.astype(np.int16)
outfile = 'world_resynthesis.wav'
wavfile.write(outfile, fs, y)
# 再合成
y = pw.synthesize(f0, decode_sp, decode_ap, fs)
y = y.astype(np.int16)
outfile = 'world_resynthesis_coded.wav'
wavfile.write(outfile, fs, y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment