tam17aki/real_time_vc.keylog.py

## real_time_vc.keylog.py
#!/usr/bin/env python3
# -*- coding:utf-8 -*-

# Copyright (c) 2020 Akira TAMAMORI
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from pynput import keyboard
import numpy as np
import pyaudio
import pyworld as pw
import threading


sample_rate = 16000
frame_length = 1024 * 8  # 分析窓長: 長くすれば再合成された音は安定するが、遅延も増える

f0_rate = 2.1  # 声の高さの調整 : 2倍にすれば1オクターブ上に、0.5倍にすれば1オクターブ下に
sp_rate = 0.725  # 声色の調整 (> 0.0) : 女性の声にする場合は1.0より小さく、男性はその逆で大きく

gate_level = 400  # 入力音声の振幅の絶対値がこの値以下の場合は分析合成しない


def analysis_resynthesis(signal):

    f0, t = pw.harvest(signal, sample_rate)  # 基本周波数の抽出
    sp = pw.cheaptrick(signal, f0, t, sample_rate)  # スペクトル包絡の抽出
    ap = pw.d4c(signal, f0, t, sample_rate)  # 非周期性指標の抽出

    # ピッチシフト
    modified_f0 = f0_rate * f0

    # フォルマントシフト（周波数軸の一様な伸縮）
    modified_sp = np.zeros_like(sp)
    sp_range = int(modified_sp.shape[1] * sp_rate)
    for f in range(modified_sp.shape[1]):
        if (f < sp_range):
            if sp_rate >= 1.0:
                modified_sp[:, f] = sp[:, int(f / sp_rate)]
            else:
                modified_sp[:, f] = sp[:, int(sp_rate * f)]
        else:
            modified_sp[:, f] = sp[:, f]

    synth = pw.synthesize(modified_f0, modified_sp, ap, sample_rate)

    return synth.astype(np.int16).tobytes()


QuitFlag = False  # q or Escキーが押されたかのフラグ


# キーが押下されたときのコールバック関数
def on_press(key):
    global QuitFlag
    global thread
    global f0_rate
    global sp_rate
    global gate_level

    try:
        char = key.char
        if char == 'u':
            f0_rate = f0_rate + 0.1
        elif char == 'd':
            f0_rate = f0_rate - 0.1
        elif char == 's':
            sp_rate = sp_rate + 0.01
        elif char == 'w':
            sp_rate = sp_rate - 0.01
        elif char == 'g':
            gate_level = gate_level + 50
        elif char == 'h':
            gate_level = gate_level - 50

    except AttributeError:  # 修飾キーが押された場合
        char = key
    finally:
        if char == keyboard.Key.esc or char == 'q':
            QuitFlag = True
            listener.stop()
            thread.join()


# 押下されたキーが離されるときのコールバック関数
def on_release(key):
    # 何もしない
    pass


# キー入力監視用の関数
def KeyLogger():
    listener.start()


# キー入力監視（リスナー）
listener = keyboard.Listener(on_press=on_press,
                             on_release=on_release)

# キー入力監視用スレッド
thread = threading.Thread(target=KeyLogger)


if __name__ == "__main__":

    audio = pyaudio.PyAudio()

    stream = audio.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=sample_rate,
                        frames_per_buffer=frame_length,
                        input=True,
                        output=True)

    # キー入力監視の開始
    thread.start()

    try:
        while stream.is_active():
            input = stream.read(frame_length, exception_on_overflow=False)
            signal = np.frombuffer(input, dtype='int16').astype(np.float64)
            if(np.max(signal) > gate_level):
                output = analysis_resynthesis(signal)
                stream.write(output)

            if QuitFlag is True:
                break

    except KeyboardInterrupt:
        print("\nInterrupt.")

    finally:
        stream.stop_stream()
        stream.close()
        audio.terminate()
        thread.join()

        print("Stop Streaming.")
	#!/usr/bin/env python3
	# -- coding:utf-8 --

	# Copyright (c) 2020 Akira TAMAMORI
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.

	from pynput import keyboard
	import numpy as np
	import pyaudio
	import pyworld as pw
	import threading


	sample_rate = 16000
	frame_length = 1024 * 8 # 分析窓長: 長くすれば再合成された音は安定するが、遅延も増える

	f0_rate = 2.1 # 声の高さの調整 : 2倍にすれば1オクターブ上に、0.5倍にすれば1オクターブ下に
	sp_rate = 0.725 # 声色の調整 (> 0.0) : 女性の声にする場合は1.0より小さく、男性はその逆で大きく

	gate_level = 400 # 入力音声の振幅の絶対値がこの値以下の場合は分析合成しない


	def analysis_resynthesis(signal):

	f0, t = pw.harvest(signal, sample_rate) # 基本周波数の抽出
	sp = pw.cheaptrick(signal, f0, t, sample_rate) # スペクトル包絡の抽出
	ap = pw.d4c(signal, f0, t, sample_rate) # 非周期性指標の抽出

	# ピッチシフト
	modified_f0 = f0_rate * f0

	# フォルマントシフト（周波数軸の一様な伸縮）
	modified_sp = np.zeros_like(sp)
	sp_range = int(modified_sp.shape[1] * sp_rate)
	for f in range(modified_sp.shape[1]):
	if (f < sp_range):
	if sp_rate >= 1.0:
	modified_sp[:, f] = sp[:, int(f / sp_rate)]
	else:
	modified_sp[:, f] = sp[:, int(sp_rate * f)]
	else:
	modified_sp[:, f] = sp[:, f]

	synth = pw.synthesize(modified_f0, modified_sp, ap, sample_rate)

	return synth.astype(np.int16).tobytes()


	QuitFlag = False # q or Escキーが押されたかのフラグ


	# キーが押下されたときのコールバック関数
	def on_press(key):
	global QuitFlag
	global thread
	global f0_rate
	global sp_rate
	global gate_level

	try:
	char = key.char
	if char == 'u':
	f0_rate = f0_rate + 0.1
	elif char == 'd':
	f0_rate = f0_rate - 0.1
	elif char == 's':
	sp_rate = sp_rate + 0.01
	elif char == 'w':
	sp_rate = sp_rate - 0.01
	elif char == 'g':
	gate_level = gate_level + 50
	elif char == 'h':
	gate_level = gate_level - 50

	except AttributeError: # 修飾キーが押された場合
	char = key
	finally:
	if char == keyboard.Key.esc or char == 'q':
	QuitFlag = True
	listener.stop()
	thread.join()


	# 押下されたキーが離されるときのコールバック関数
	def on_release(key):
	# 何もしない
	pass


	# キー入力監視用の関数
	def KeyLogger():
	listener.start()


	# キー入力監視（リスナー）
	listener = keyboard.Listener(on_press=on_press,
	on_release=on_release)

	# キー入力監視用スレッド
	thread = threading.Thread(target=KeyLogger)


	if __name__ == "__main__":

	audio = pyaudio.PyAudio()

	stream = audio.open(format=pyaudio.paInt16,
	channels=1,
	rate=sample_rate,
	frames_per_buffer=frame_length,
	input=True,
	output=True)

	# キー入力監視の開始
	thread.start()

	try:
	while stream.is_active():
	input = stream.read(frame_length, exception_on_overflow=False)
	signal = np.frombuffer(input, dtype='int16').astype(np.float64)
	if(np.max(signal) > gate_level):
	output = analysis_resynthesis(signal)
	stream.write(output)

	if QuitFlag is True:
	break

	except KeyboardInterrupt:
	print("\nInterrupt.")

	finally:
	stream.stop_stream()
	stream.close()
	audio.terminate()
	thread.join()

	print("Stop Streaming.")