Skip to content

Instantly share code, notes, and snippets.

@takana-v
Last active January 2, 2023 10:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takana-v/acd58fca72255a9c238bb297c0f28bc7 to your computer and use it in GitHub Desktop.
Save takana-v/acd58fca72255a9c238bb297c0f28bc7 to your computer and use it in GitHub Desktop.
# vv_core_wrapper.py
# Copyright (c) 2022 takana-v
#
# This software is released under the MIT License.
# http://opensource.org/licenses/mit-license.php
import faulthandler
import json
from ctypes import *
from pprint import pprint
# エラー時のデバッグ用
faulthandler.enable()
core_path = r"/path/to/voicevox_core.dll"
dict_path = r"/path/to/open_jtalk_dic_utf_8-1.11"
core = cdll.LoadLibrary(core_path)
class VoicevoxInitializeOptions(Structure):
_fields_ = [
("acceleration_mode", c_int32),
("cpu_num_threads", c_uint16),
("load_all_models", c_bool),
("open_jtalk_dict_dir", c_char_p),
]
class VoicevoxAudioQueryOptions(Structure):
_fields_ = [
("kana", c_bool),
]
class VoicevoxSynthesisOptions(Structure):
_fields_ = [
("enable_interrogative_upspeak", c_bool),
]
class VoicevoxTtsOptions(Structure):
_fields_ = [
("kana", c_bool),
("enable_interrogative_upspeak", c_bool),
]
core.voicevox_audio_query.argtypes = (c_char_p, c_uint32, VoicevoxAudioQueryOptions, POINTER(c_char_p))
core.voicevox_audio_query.restype = c_int32
core.voicevox_audio_query_json_free.argtypes = (c_char_p,)
core.voicevox_audio_query_json_free.restype = None
core.voicevox_error_result_to_message.argtypes = (c_int32,)
core.voicevox_error_result_to_message.restype = c_char_p
core.voicevox_finalize.argtypes = ()
core.voicevox_finalize.restype = None
core.voicevox_get_metas_json.argtypes = ()
core.voicevox_get_metas_json.restype = c_char_p
core.voicevox_get_supported_devices_json.argtypes = ()
core.voicevox_get_supported_devices_json.restype = c_char_p
core.voicevox_get_version.argtypes = ()
core.voicevox_get_version.restype = c_char_p
core.voicevox_initialize.argtypes = (VoicevoxInitializeOptions,)
core.voicevox_initialize.restype = c_int32
core.voicevox_is_gpu_mode.argtypes = ()
core.voicevox_is_gpu_mode.restype = c_bool
core.voicevox_is_model_loaded.argtypes = (c_uint32,)
core.voicevox_is_model_loaded.restype = c_bool
core.voicevox_load_model.argtypes = (c_uint32,)
core.voicevox_load_model.restype = c_int32
core.voicevox_make_default_audio_query_options.argtypes = ()
core.voicevox_make_default_audio_query_options.restype = VoicevoxAudioQueryOptions
core.voicevox_make_default_initialize_options.argtypes = ()
core.voicevox_make_default_initialize_options.restype = VoicevoxInitializeOptions
core.voicevox_make_default_synthesis_options.argtypes = ()
core.voicevox_make_default_synthesis_options.restype = VoicevoxSynthesisOptions
core.voicevox_make_default_tts_options.argtypes = ()
core.voicevox_make_default_tts_options.restype = VoicevoxTtsOptions
core.voicevox_synthesis.argtypes = (c_char_p, c_uint32, VoicevoxSynthesisOptions, POINTER(c_uint * 1), POINTER(POINTER(c_uint8)))
core.voicevox_synthesis.restype = c_int32
core.voicevox_tts.argtypes = (c_char_p, c_uint32, VoicevoxTtsOptions, POINTER(c_uint * 1), POINTER(POINTER(c_uint8)))
core.voicevox_tts.restype = c_int32
core.voicevox_wav_free.argtypes = (POINTER(c_uint8),)
core.voicevox_wav_free.restype = None
if __name__ == "__main__":
# コアの初期化
initialize_option = core.voicevox_make_default_initialize_options()
initialize_option.open_jtalk_dict_dir = c_char_p(dict_path.encode())
res = core.voicevox_initialize(initialize_option)
if res != 0:
raise Exception(core.voicevox_error_result_to_message(res).decode("utf-8"))
# 各種情報の表示
print(f'version: {core.voicevox_get_version().decode("utf-8")}')
print(f'is_gpu: {core.voicevox_is_gpu_mode()}')
print(f'metas:')
pprint(json.loads(core.voicevox_get_metas_json()))
# モデルのロード
speaker_id = 8
print(f'is_model_loaded: {core.voicevox_is_model_loaded(speaker_id)}')
res = core.voicevox_load_model(speaker_id)
if res != 0:
raise Exception(core.voicevox_error_result_to_message(res).decode("utf-8"))
print(f'is_model_loaded: {core.voicevox_is_model_loaded(speaker_id)}')
# TTSを試す
tts_option = core.voicevox_make_default_tts_options()
output_wav_length = pointer((c_uint * 1)())
output_wav = pointer(pointer(c_uint8()))
text = c_char_p("これはテストです。".encode("utf-8"))
res = core.voicevox_tts(text, speaker_id, tts_option, output_wav_length, output_wav)
if res != 0:
raise Exception(core.voicevox_error_result_to_message(res).decode("utf-8"))
print(f'output_wav_length: {output_wav_length.contents[0]}')
output_wav_bin = b""
for i in range(output_wav_length.contents[0]):
output_wav_bin += output_wav.contents[i].to_bytes(1, "big")
with open("output.wav", mode="wb") as f:
f.write(output_wav_bin)
core.voicevox_wav_free(output_wav.contents)
# AudioQueryの作成を試す
audio_query_option = core.voicevox_make_default_audio_query_options()
output_audio_query_json = pointer(c_char_p())
res = core.voicevox_audio_query(text, speaker_id, audio_query_option, output_audio_query_json)
if res != 0:
raise Exception(core.voicevox_error_result_to_message(res).decode("utf-8"))
audio_query = json.loads(output_audio_query_json.contents.value)
print(f'AudioQuery:')
pprint(audio_query)
core.voicevox_audio_query_json_free(output_audio_query_json.contents)
# ちょっとAudioQueryをいじってみる
audio_query["pitch_scale"] = 0.1
# AudioQueryを元に音声合成してみる
synthesis_option = core.voicevox_make_default_synthesis_options()
audio_query_json = c_char_p(json.dumps(audio_query).encode("utf-8"))
output_wav_length = pointer((c_uint * 1)())
output_wav = pointer(pointer(c_uint8()))
res = core.voicevox_synthesis(audio_query_json, speaker_id, synthesis_option, output_wav_length, output_wav)
if res != 0:
raise Exception(core.voicevox_error_result_to_message(res).decode("utf-8"))
print(f'output_wav_length: {output_wav_length.contents[0]}')
output_wav_bin = b""
for i in range(output_wav_length.contents[0]):
output_wav_bin += output_wav.contents[i].to_bytes(1, "big")
with open("output_2.wav", mode="wb") as f:
f.write(output_wav_bin)
core.voicevox_wav_free(output_wav.contents)
# 後片付け
core.voicevox_finalize()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment