tam17aki/real_time_vc2.py

## real_time_vc2.py
#!/usr/bin/python3
# -*- coding: utf-8 -*-

# Copyright (c) 2020 Akira TAMAMORI
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from PyQt5.QtWidgets import (QWidget, QSlider,
                             QLabel, QApplication)
from PyQt5.QtCore import Qt, QTimer
import sys
import pyaudio
import numpy as np
import pyworld as pw

# https: // qiita.com/Nobu12/items/6248c509401b0e666a55

sample_rate = 16000
input_buffer_size = 1024 * 8
output_buffer_size = 1024 * 2
f0_rate = 1.0  # 声の高さの調整 : 2倍にすれば1オクターブ上に、0.5倍にすれば1オクターブ下に
sp_rate = 1.0  # 声色の調整 (> 0.0) : 女性の声にする場合は1.0より小さく、男性はその逆で大きく


class Analysis_Synthesis(QWidget):

    def __init__(self):
        super().__init__()

        self.CHUNK = input_buffer_size  # 1度に読み取る音声のデータ幅
        self.RATE = sample_rate  # サンプリング周波数
        self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(format=pyaudio.paInt16,
                                         channels=1,
                                         rate=self.RATE,
                                         input=True,
                                         output=True,
                                         frames_per_buffer=self.CHUNK)

        self.stream_out = self.audio.open(format=pyaudio.paInt16,
                                          channels=1,
                                          rate=self.RATE,
                                          output=True,
                                          frames_per_buffer=output_buffer_size)

        self.timer = QTimer()
        self.timer.timeout.connect(self.update)
        self.timer.start(5)

        self.f0_rate = f0_rate
        self.sp_rate = sp_rate

        # 音声データの格納場所(プロットデータ)
        self.data = np.zeros(self.CHUNK)

        self.initUI()

    def analysis_resynthesis(self, signal):
        f0, t = pw.dio(signal, sample_rate)  # 基本周波数の抽出
        f0 = pw.stonemask(signal, f0, t, sample_rate)  # refinement
        sp = pw.cheaptrick(signal, f0, t, sample_rate)  # スペクトル包絡の抽出
        ap = pw.d4c(signal, f0, t, sample_rate)  # 非周期性指標の抽出

        # ピッチシフト
        modified_f0 = self.f0_rate * f0

        # フォルマントシフト（周波数軸の一様な伸縮）
        modified_sp = np.zeros_like(sp)
        sp_range = int(modified_sp.shape[1] * self.sp_rate)
        for f in range(modified_sp.shape[1]):
            if (f < sp_range):
                if self.sp_rate >= 1.0:
                    modified_sp[:, f] = sp[:, int(f / self.sp_rate)]
                else:
                    modified_sp[:, f] = sp[:, int(self.sp_rate * f)]
            else:
                modified_sp[:, f] = sp[:, f]

        synth = pw.synthesize(modified_f0, modified_sp, ap, sample_rate)

        return synth.astype(np.int16).tobytes()

    def update(self):
        try:
            audio_input = self.stream_in.read(
                self.CHUNK, exception_on_overflow=False)
            signal = np.frombuffer(
                audio_input, dtype='int16').astype(np.float64)
            output = self.analysis_resynthesis(signal)
            self.stream_out.write(output)

        except KeyboardInterrupt:
            print("\nInterrupt.")
            self.stream_in.stop_stream()
            self.stream_in.close()
            self.stream_out.stop_stream()
            self.stream_out.close()
            self.audio.terminate()
            print("Stop Streaming.")

    def initUI(self):

        # 垂直方向のスライダー作成
        sld_f0 = QSlider(Qt.Vertical, self)
        # スライダーがフォーカスされないようにする
        sld_f0.setFocusPolicy(Qt.NoFocus)
        sld_f0.setGeometry(30, 40, 30, 200)
        # スライダーが動くとchangeValue関数が呼び出される
        sld_f0.valueChanged[int].connect(self.changeValue_f0)

        # 垂直方向のスライダー作成
        sld_sp = QSlider(Qt.Vertical, self)
        # スライダーがフォーカスされないようにする
        sld_sp.setFocusPolicy(Qt.NoFocus)
        sld_sp.setGeometry(130, 40, 30, 200)
        # スライダーが動くとchangeValue関数が呼び出される
        sld_sp.valueChanged[int].connect(self.changeValue_sp)

        # ラベル作成 (文字を表示するクラス)
        self.label_f0 = QLabel(self)
        # ラベル位置（x, y, ラベル幅、ラベル高さ）
        self.label_f0.setGeometry(30, 0, 100, 40)
        self.label_f0.setText(
            'Pitch\n' + str(int(self.f0_rate * 100)) + '%')

        # ラベル作成 (文字を表示するクラス)
        self.label_sp = QLabel(self)
        # ラベル位置（x, y, ラベル幅、ラベル高さ）
        self.label_sp.setGeometry(120, 0, 100, 40)
        self.label_sp.setText('Formant\n' + str(int(self.sp_rate * 100)) + '%')

        sld_f0.setValue(25)
        sld_sp.setValue(20)

        # Windowのサイズ
        self.setGeometry(300, 300, 200, 250)
        self.setWindowTitle('ボイスチェンジャ')
        self.show()

    def changeValue_f0(self, value):
        self.f0_rate = (50 + 2.0 * value) / 100
        self.label_f0.setText('Pitch\n' + str(int(self.f0_rate * 100)) + '%')

    def changeValue_sp(self, value):
        self.sp_rate = (75 + 1.25 * value) / 100
        self.label_sp.setText('Formant\n' + str(int(self.sp_rate * 100)) + '%')


if __name__ == '__main__':

    app = QApplication(sys.argv)
    ex = Analysis_Synthesis()
    sys.exit(app.exec_())
	#!/usr/bin/python3
	# -- coding: utf-8 --

	# Copyright (c) 2020 Akira TAMAMORI
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.

	from PyQt5.QtWidgets import (QWidget, QSlider,
	QLabel, QApplication)
	from PyQt5.QtCore import Qt, QTimer
	import sys
	import pyaudio
	import numpy as np
	import pyworld as pw

	# https: // qiita.com/Nobu12/items/6248c509401b0e666a55

	sample_rate = 16000
	input_buffer_size = 1024 * 8
	output_buffer_size = 1024 * 2
	f0_rate = 1.0 # 声の高さの調整 : 2倍にすれば1オクターブ上に、0.5倍にすれば1オクターブ下に
	sp_rate = 1.0 # 声色の調整 (> 0.0) : 女性の声にする場合は1.0より小さく、男性はその逆で大きく


	class Analysis_Synthesis(QWidget):

	def __init__(self):
	super().__init__()

	self.CHUNK = input_buffer_size # 1度に読み取る音声のデータ幅
	self.RATE = sample_rate # サンプリング周波数
	self.audio = pyaudio.PyAudio()
	self.stream_in = self.audio.open(format=pyaudio.paInt16,
	channels=1,
	rate=self.RATE,
	input=True,
	output=True,
	frames_per_buffer=self.CHUNK)

	self.stream_out = self.audio.open(format=pyaudio.paInt16,
	channels=1,
	rate=self.RATE,
	output=True,
	frames_per_buffer=output_buffer_size)

	self.timer = QTimer()
	self.timer.timeout.connect(self.update)
	self.timer.start(5)

	self.f0_rate = f0_rate
	self.sp_rate = sp_rate

	# 音声データの格納場所(プロットデータ)
	self.data = np.zeros(self.CHUNK)

	self.initUI()

	def analysis_resynthesis(self, signal):
	f0, t = pw.dio(signal, sample_rate) # 基本周波数の抽出
	f0 = pw.stonemask(signal, f0, t, sample_rate) # refinement
	sp = pw.cheaptrick(signal, f0, t, sample_rate) # スペクトル包絡の抽出
	ap = pw.d4c(signal, f0, t, sample_rate) # 非周期性指標の抽出

	# ピッチシフト
	modified_f0 = self.f0_rate * f0

	# フォルマントシフト（周波数軸の一様な伸縮）
	modified_sp = np.zeros_like(sp)
	sp_range = int(modified_sp.shape[1] * self.sp_rate)
	for f in range(modified_sp.shape[1]):
	if (f < sp_range):
	if self.sp_rate >= 1.0:
	modified_sp[:, f] = sp[:, int(f / self.sp_rate)]
	else:
	modified_sp[:, f] = sp[:, int(self.sp_rate * f)]
	else:
	modified_sp[:, f] = sp[:, f]

	synth = pw.synthesize(modified_f0, modified_sp, ap, sample_rate)

	return synth.astype(np.int16).tobytes()

	def update(self):
	try:
	audio_input = self.stream_in.read(
	self.CHUNK, exception_on_overflow=False)
	signal = np.frombuffer(
	audio_input, dtype='int16').astype(np.float64)
	output = self.analysis_resynthesis(signal)
	self.stream_out.write(output)

	except KeyboardInterrupt:
	print("\nInterrupt.")
	self.stream_in.stop_stream()
	self.stream_in.close()
	self.stream_out.stop_stream()
	self.stream_out.close()
	self.audio.terminate()
	print("Stop Streaming.")

	def initUI(self):

	# 垂直方向のスライダー作成
	sld_f0 = QSlider(Qt.Vertical, self)
	# スライダーがフォーカスされないようにする
	sld_f0.setFocusPolicy(Qt.NoFocus)
	sld_f0.setGeometry(30, 40, 30, 200)
	# スライダーが動くとchangeValue関数が呼び出される
	sld_f0.valueChanged[int].connect(self.changeValue_f0)

	# 垂直方向のスライダー作成
	sld_sp = QSlider(Qt.Vertical, self)
	# スライダーがフォーカスされないようにする
	sld_sp.setFocusPolicy(Qt.NoFocus)
	sld_sp.setGeometry(130, 40, 30, 200)
	# スライダーが動くとchangeValue関数が呼び出される
	sld_sp.valueChanged[int].connect(self.changeValue_sp)

	# ラベル作成 (文字を表示するクラス)
	self.label_f0 = QLabel(self)
	# ラベル位置（x, y, ラベル幅、ラベル高さ）
	self.label_f0.setGeometry(30, 0, 100, 40)
	self.label_f0.setText(
	'Pitch\n' + str(int(self.f0_rate * 100)) + '%')

	# ラベル作成 (文字を表示するクラス)
	self.label_sp = QLabel(self)
	# ラベル位置（x, y, ラベル幅、ラベル高さ）
	self.label_sp.setGeometry(120, 0, 100, 40)
	self.label_sp.setText('Formant\n' + str(int(self.sp_rate * 100)) + '%')

	sld_f0.setValue(25)
	sld_sp.setValue(20)

	# Windowのサイズ
	self.setGeometry(300, 300, 200, 250)
	self.setWindowTitle('ボイスチェンジャ')
	self.show()

	def changeValue_f0(self, value):
	self.f0_rate = (50 + 2.0 * value) / 100
	self.label_f0.setText('Pitch\n' + str(int(self.f0_rate * 100)) + '%')

	def changeValue_sp(self, value):
	self.sp_rate = (75 + 1.25 * value) / 100
	self.label_sp.setText('Formant\n' + str(int(self.sp_rate * 100)) + '%')


	if __name__ == '__main__':

	app = QApplication(sys.argv)
	ex = Analysis_Synthesis()
	sys.exit(app.exec_())