Skip to content

Instantly share code, notes, and snippets.

@qxj
Created March 24, 2023 07:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save qxj/16acb506c2fa98a8a93e096292f99a0b to your computer and use it in GitHub Desktop.
Save qxj/16acb506c2fa98a8a93e096292f99a0b to your computer and use it in GitHub Desktop.
Azure text to speech demo
#!/usr/bin/env python3
# coding: utf-8
"""
Speech synthesis samples for the Microsoft Cognitive Services Speech SDK
https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/python/console/speech_synthesis_sample.py
"""
import os
import azure.cognitiveservices.speech as speechsdk
# Set up the subscription info for the Speech Service:
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
# https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts#prebuilt-neural-voices
speech_config.speech_synthesis_voice_name='zh-CN-YunfengNeural'
def speech_synthesis_to_file(text, file_name = "outputaudio.wav"):
"""performs speech synthesis to a wave file"""
# Creates a speech synthesizer using file as audio output.
file_config = speechsdk.audio.AudioOutputConfig(filename=file_name)
# https://docs.microsoft.com/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs
ext = os.path.splitext(file_name)
if ext == ".mp3":
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=file_config)
result = speech_synthesizer.speak_text_async(text).get()
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print("Speech synthesized for text [{}], and the audio was saved to [{}]".format(text, file_name))
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
text = '''众所周知,超图结构有助于图中节点的高阶关系建模,并且有利于建立节点的多种关系,受到众多研究者青睐。然而,与普通图网络类似,超图中静态的启发式拓扑结构与现实中动态演变的图节点关系相悖,限制了超图的学习效果。
针对上述问题,我们来看一篇阿里发表在CIKM2022上发表的文章,动态超图协同过滤。文中提出了一种可微的轻量级多层超图学习器,它可以在训练过程中在不同的层动态地学习超图结构。
论文所提模型DHLCF在Yelp, Gowalla和LastFM-2K数据集上针对NDCG@10上分别取得了14.91%、14.67%和25.67%的改进。
'''
filename = 'tts_demo.wav'
speech_synthesis_to_file(text, filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment