qxj/tts_demo.py

## tts_demo.py
#!/usr/bin/env python3

# coding: utf-8

"""
Speech synthesis samples for the Microsoft Cognitive Services Speech SDK
https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/speech_synthesis_sample.py
"""

import os
import azure.cognitiveservices.speech as speechsdk

# Set up the subscription info for the Speech Service:
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)

# https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts#prebuilt-neural-voices
speech_config.speech_synthesis_voice_name='zh-CN-YunfengNeural'

def speech_synthesis_to_file(text, file_name = "outputaudio.wav"):
    """performs speech synthesis to a wave file"""
    # Creates a speech synthesizer using file as audio output.
    file_config = speechsdk.audio.AudioOutputConfig(filename=file_name)
    # https://docs.microsoft.com/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs
    ext = os.path.splitext(file_name)
    if ext == ".mp3":
        speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=file_config)

    result = speech_synthesizer.speak_text_async(text).get()
    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("Speech synthesized for text [{}], and the audio was saved to [{}]".format(text, file_name))
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: {}".format(cancellation_details.error_details))


text = '''众所周知，超图结构有助于图中节点的高阶关系建模，并且有利于建立节点的多种关系，受到众多研究者青睐。然而，与普通图网络类似，超图中静态的启发式拓扑结构与现实中动态演变的图节点关系相悖，限制了超图的学习效果。
针对上述问题，我们来看一篇阿里发表在CIKM2022上发表的文章，动态超图协同过滤。文中提出了一种可微的轻量级多层超图学习器，它可以在训练过程中在不同的层动态地学习超图结构。
论文所提模型DHLCF在Yelp, Gowalla和LastFM-2K数据集上针对NDCG@10上分别取得了14.91%、14.67%和25.67%的改进。
'''
filename = 'tts_demo.wav'
speech_synthesis_to_file(text, filename)
	#!/usr/bin/env python3

	# coding: utf-8

	"""
	Speech synthesis samples for the Microsoft Cognitive Services Speech SDK
	https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/speech_synthesis_sample.py
	"""

	import os
	import azure.cognitiveservices.speech as speechsdk

	# Set up the subscription info for the Speech Service:
	speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
	audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)

	# https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts#prebuilt-neural-voices
	speech_config.speech_synthesis_voice_name='zh-CN-YunfengNeural'

	def speech_synthesis_to_file(text, file_name = "outputaudio.wav"):
	"""performs speech synthesis to a wave file"""
	# Creates a speech synthesizer using file as audio output.
	file_config = speechsdk.audio.AudioOutputConfig(filename=file_name)
	# https://docs.microsoft.com/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs
	ext = os.path.splitext(file_name)
	if ext == ".mp3":
	speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
	speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=file_config)

	result = speech_synthesizer.speak_text_async(text).get()
	if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
	print("Speech synthesized for text [{}], and the audio was saved to [{}]".format(text, file_name))
	elif result.reason == speechsdk.ResultReason.Canceled:
	cancellation_details = result.cancellation_details
	print("Speech synthesis canceled: {}".format(cancellation_details.reason))
	if cancellation_details.reason == speechsdk.CancellationReason.Error:
	print("Error details: {}".format(cancellation_details.error_details))


	text = '''众所周知，超图结构有助于图中节点的高阶关系建模，并且有利于建立节点的多种关系，受到众多研究者青睐。然而，与普通图网络类似，超图中静态的启发式拓扑结构与现实中动态演变的图节点关系相悖，限制了超图的学习效果。
	针对上述问题，我们来看一篇阿里发表在CIKM2022上发表的文章，动态超图协同过滤。文中提出了一种可微的轻量级多层超图学习器，它可以在训练过程中在不同的层动态地学习超图结构。
	论文所提模型DHLCF在Yelp, Gowalla和LastFM-2K数据集上针对NDCG@10上分别取得了14.91%、14.67%和25.67%的改进。
	'''
	filename = 'tts_demo.wav'
	speech_synthesis_to_file(text, filename)