Skip to content

Instantly share code, notes, and snippets.

@xjohjrdy
Last active April 24, 2024 09:27
Show Gist options
  • Star 63 You must be signed in to star a gist
  • Fork 19 You must be signed in to fork a gist
  • Save xjohjrdy/11b13c9d9f3a2d84b1f22148c429b163 to your computer and use it in GitHub Desktop.
Save xjohjrdy/11b13c9d9f3a2d84b1f22148c429b163 to your computer and use it in GitHub Desktop.
返回的音频没有文件头,所以我随便加的一个文件头,但显示的音频时长有问题。如果播放器不能正常播放,可以使用Chrome播放。
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from ws4py.client.threadedclient import WebSocketClient
import binascii
class WSClient(WebSocketClient):
def __init__(self, url, text, filename):
self.fp = open(filename, 'wb')
self.fp.write(binascii.unhexlify('524946460000000057415645666d74201000000001000200803e000000fa0000040010006461746100000000'))
self.text = text
super(WSClient, self).__init__(url)
def opened(self):
self.send('Content-Type:application/json; charset=utf-8\r\n\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"true"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}\r\n')
self.send("X-RequestId:fe83fbefb15c7739fe674d9f3e81d38f\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)'><prosody pitch='+0Hz' rate ='+0%' volume='+0%'>"+self.text+"</prosody></voice></speak>\r\n")
def received_message(self, m):
if 'turn.end' in m.data:
self.close()
self.fp.close()
elif 'Path:audio\r\n' in m.data:
self.fp.write(m.data.split('Path:audio\r\n')[1])
else:
# print(m)
pass
if __name__ == '__main__':
url = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4'
text = '浙江温州,浙江温州,最大皮革厂,江南皮革厂倒闭了!老板黄鹤吃喝嫖赌,欠下了3.5个亿,带着他的小姨子跑了。我们没有办法,拿着钱包抵工资。原价都是三百多、二百多、一百多的钱包,通通二十块,通通二十块!黄鹤你不是人,我们辛辛苦苦给你干了大半年,你不发工资,你还我血汗钱,还我血汗钱!'
filename = '/tmp/test.wav'
ws = WSClient(url, text, filename)
ws.connect()
ws.run_forever()
@ODtian
Copy link

ODtian commented Apr 4, 2020

声音和浏览器上还是有区别,看了一下是16khz 32kbps的,但是说应该是audio-24khz-48kbitrate-mono-mp3这个的,可能是播放器问题,希望能够一起讨论一下,邮箱tianluyue2013@gmail.com。直接保存为mp3可以正常播放,不需要头文件了。

@ODtian
Copy link

ODtian commented Apr 6, 2020

已经解决了!

@geekyouth
Copy link

python-3.7 多处报错

@Ansen
Copy link

Ansen commented Jul 20, 2020

已经解决了!

怎么解决的,能分享不?

@Alex-coder251
Copy link

大佬能写成python3版本的吗😭

@Ansen
Copy link

Ansen commented Jul 27, 2020

Python3 版本

#!/usr/bin/env python3
# -*- coding:utf-8 -*-

from ws4py.client.threadedclient import WebSocketClient
import binascii

class WSClient(WebSocketClient):
    def __init__(self, url, text, filename):
        self.fp = open(filename, 'wb')
        self.fp.write(binascii.unhexlify('524946460000000057415645666d74201000000001000200803e000000fa0000040010006461746100000000'))
        self.text = text
        super(WSClient, self).__init__(url)
        
    def opened(self):
        self.send('Content-Type:application/json; charset=utf-8\r\n\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"true"},"outputFormat":"audio-24khz-160kbitrate-mono-mp3"}}}}\r\n')
        self.send("X-RequestId:fe83fbefb15c7739fe674d9f3e81d38f\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice  name='Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)'><prosody pitch='+0Hz' rate ='+0%' volume='+0%'>"+self.text+"</prosody></voice></speak>\r\n")

    def received_message(self, m):
        if b'turn.end' in m.data:
            self.close()
            self.fp.close()
        elif b'Path:audio\r\n' in m.data:
            song_bytes = m.data.split(b'Path:audio\r\n')[1]
            self.fp.write(song_bytes)

        else:
            # print(m)
            pass


if __name__ == '__main__':
    url = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4'
    text = '浙江温州,浙江温州,最大皮革厂,江南皮革厂倒闭了!老板黄鹤吃喝嫖赌,欠下了3.5个亿,带着他的小姨子跑了。我们没有办法,拿着钱包抵工资。原价都是三百多、二百多、一百多的钱包,通通二十块,通通二十块!黄鹤你不是人,我们辛辛苦苦给你干了大半年,你不发工资,你还我血汗钱,还我血汗钱!'
    filename = './test.wav'
    ws = WSClient(url, text, filename)
    ws.connect()
    ws.run_forever()

@Alex-coder251
Copy link

Failed to receive data
Traceback (most recent call last):
File "C:\Users\Family\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\LocalCache\local-packages\Python38\site-packages\ws4py\websocket.py", line 394, in once
b = self.sock.recv(self.reading_buffer_size)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.1520.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 1226, in recv
return self.read(buflen)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.1520.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 1101, in read
return self._sslobj.read(len)
ConnectionResetError: [WinError 10054] 远程主机强迫关闭了一个现有的连接。
不知道是怎么回事

Copy link

ghost commented Aug 14, 2020

line 5, in
from ws4py.client.threadedclient import WebSocketClient
ModuleNotFoundError: No module named 'ws4py'

@zzzop
Copy link

zzzop commented Oct 12, 2020

可用,感谢

@zzzop
Copy link

zzzop commented Oct 12, 2020

声音和浏览器上还是有区别,看了一下是16khz 32kbps的,但是说应该是audio-24khz-48kbitrate-mono-mp3这个的,可能是播放器问题,希望能够一起讨论一下,邮箱tianluyue2013@gmail.com。直接保存为mp3可以正常播放,不需要头文件了。

请问如何解决的呢,我的也是16khz 32kbps的

@BeiyanYunyi
Copy link

line 5, in
from ws4py.client.threadedclient import WebSocketClient
ModuleNotFoundError: No module named 'ws4py'

sudo pip install ws4py

@ag2s20150909
Copy link

保存为mp3行了

@Shijiuwei
Copy link

请问下大家在哪里 wordBoundaryEnabled 词边界位置信息。

@fastfading
Copy link

如何重用一个 wss connection
每次wss connect 都要花费不少时间。
能否用同一个 wss 做多次的tts 请求

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment