teidesu/ym_recognition.py

## ym_recognition.py
"""
This is small script that (ab)uses Yandex Music Recognition.
I hope the code is self-documented <3
Notice! Input file should be .ogg file, preferably with libopus encoder
        (untested with other encoders)
(c) teidesu, 2019. This script is licensed under GPLv3 license.
"""

import lomond
import uuid as uuid_py


def load_file(name):
    with open(name, "rb") as f:
        return load_data(f.read())


def load_data(raw):
    ret = []
    for chunk in raw.split(b'OggS')[1:]:
        if b'OpusTags' in chunk:
            pos = chunk.index(b'OpusTags') + 12
            size = len(chunk)
            chunk = chunk[:pos] + b'#\x00\x00\x00\x00ENCODER=SpeechKit Mobile SDK v3.28.0'
            chunk += b"\x00" * (size - len(chunk))
        ret.append(b'\x00\x00\x00\x01OggS' + chunk)
    return ret


def uuid():
    return str(uuid_py.uuid4())


def recognize(data):
    device = uuid().replace("-", "")
    pos = 0
    ws = lomond.WebSocket("wss://voiceservices.yandex.net/uni.ws")
    for msg in ws.connect():
        if msg.name == "ready":
            ws.send_json({"event": {
                "header": {"messageId": uuid(), "name": "SynchronizeState",
                           "namespace": "System"},
                "payload": {"accept_invalid_auth": True, "auth_token": "5983ba91-339e-443c-8452-390fe7d9d308",
                            "speechkitVersion": "3.28.0", "uuid": device}}})
            ws.send_json({"event": {
                "header": {"messageId": uuid(), "name": "Recognize", "namespace": "ASR",
                           "streamId": 1},
                "payload": {"advancedASROptions": {"manual_punctuation": False, "partial_results": True},
                            "disableAntimatNormalizer": False, "format": "audio/opus", "lang": "en-US",
                            "music_request2": {"headers": {"Content-Type": "audio/opus"}}, "punctuation": False,
                            "tags": "PASS_AUDIO;", "topic": "queries"}}}
            )
            ws.send_binary(data[0])
            pos += 1
        elif msg.name == "text":
            d = msg.json["directive"]
            if d["header"]["name"] == "Result":
                if pos < len(data):
                    ws.send_binary(data[pos])
                    pos += 1
                else:
                    ws.close()
                    return None
            elif d["header"]["name"] == "MusicResult":
                if d["payload"]["result"] == "success":
                    p = d["payload"]["data"]["match"]
                    art = []
                    for a in p["artists"]:
                        if a["various"] and "Various" not in art:
                            art.append("Various")
                        else:
                            art.append(a["name"])
                    ret = ", ".join(art or ["Unknown"]) + " - " + p["title"]
                    ws.close()
                    return ret


if __name__ == '__main__':
    print(recognize(load_file("sample2.ogg")) or "not found")

## ym_recognition_aio.py
"""
This is small script that (ab)uses Yandex Music Recognition, but using aiohttp
I hope the code is self-documented <3
Notice! Input file should be .ogg file, preferably with libopus encoder
        (untested with other encoders)
(c) teidesu, 2020. This script is licensed under GPLv3 license.
"""

import aiohttp
import asyncio
import uuid as uuid_py


def load_file(name):
    with open(name, "rb") as f:
        return load_data(f.read())


def load_data(raw):
    ret = []
    for chunk in raw.split(b'OggS')[1:]:
        if b'OpusTags' in chunk:
            pos = chunk.index(b'OpusTags') + 12
            size = len(chunk)
            chunk = chunk[:pos] + b'#\x00\x00\x00\x00ENCODER=SpeechKit Mobile SDK v3.28.0'
            chunk += b"\x00" * (size - len(chunk))
        ret.append(b'\x00\x00\x00\x01OggS' + chunk)
    return ret


def uuid():
    return str(uuid_py.uuid4())


async def recognize(data):
    device = uuid().replace("-", "")
    pos = 0
    session = aiohttp.ClientSession()
    ws = await session.ws_connect('wss://voiceservices.yandex.net/uni.ws')

    await ws.send_json({
        "event": {
            "header": {
                "messageId": uuid(),
                "name": "SynchronizeState",
                "namespace": "System"
            },
            "payload": {
                "accept_invalid_auth": True,
                "auth_token": "5983ba91-339e-443c-8452-390fe7d9d308",
                "speechkitVersion": "3.28.0",
                "uuid": device
            }
        }
    })

    await ws.send_json({
        "event": {
            "header": {
                "messageId": uuid(),
                "name": "Recognize",
                "namespace": "ASR",
                "streamId": 1
            },
            "payload": {
                "advancedASROptions": {
                    "manual_punctuation": False,
                    "partial_results": True
                },
                "disableAntimatNormalizer": False,
                "format": "audio/opus",
                "lang": "en-US",
                "music_request2": {
                    "headers": {
                        "Content-Type": "audio/opus"
                    }
                },
                "punctuation": False,
                "tags": "PASS_AUDIO;",
                "topic": "queries"
            }
        }
    })

    await ws.send_bytes(data[0])
    pos += 1

    while True:
        msg = await ws.receive()

        if msg.type == aiohttp.WSMsgType.text:
            json = msg.json()
            d = json['directive']
            if d['header']['name'] == 'Result':
                if pos < len(data):
                    await ws.send_bytes(data[pos])
                    pos += 1
                else:
                    await ws.close()
                    await session.close()
                    return None
            elif d['header']['name'] == 'MusicResult':
                if d['payload']['result'] == 'success':
                    p = d['payload']['data']['match']
                    await ws.close()
                    await session.close()
                    return p
        elif msg.type == aiohttp.WSMsgType.closed:
            return None
        elif msg.type == aiohttp.WSMsgType.error:
            return None


async def main():
    res = await recognize(load_file("sample.ogg"))
    print(res or 'not found')


if __name__ == '__main__':
    asyncio.get_event_loop().run_until_complete(main())
	"""
	This is small script that (ab)uses Yandex Music Recognition.
	I hope the code is self-documented <3
	Notice! Input file should be .ogg file, preferably with libopus encoder
	(untested with other encoders)
	(c) teidesu, 2019. This script is licensed under GPLv3 license.
	"""

	import lomond
	import uuid as uuid_py


	def load_file(name):
	with open(name, "rb") as f:
	return load_data(f.read())


	def load_data(raw):
	ret = []
	for chunk in raw.split(b'OggS')[1:]:
	if b'OpusTags' in chunk:
	pos = chunk.index(b'OpusTags') + 12
	size = len(chunk)
	chunk = chunk[:pos] + b'#\x00\x00\x00\x00ENCODER=SpeechKit Mobile SDK v3.28.0'
	chunk += b"\x00" * (size - len(chunk))
	ret.append(b'\x00\x00\x00\x01OggS' + chunk)
	return ret


	def uuid():
	return str(uuid_py.uuid4())


	def recognize(data):
	device = uuid().replace("-", "")
	pos = 0
	ws = lomond.WebSocket("wss://voiceservices.yandex.net/uni.ws")
	for msg in ws.connect():
	if msg.name == "ready":
	ws.send_json({"event": {
	"header": {"messageId": uuid(), "name": "SynchronizeState",
	"namespace": "System"},
	"payload": {"accept_invalid_auth": True, "auth_token": "5983ba91-339e-443c-8452-390fe7d9d308",
	"speechkitVersion": "3.28.0", "uuid": device}}})
	ws.send_json({"event": {
	"header": {"messageId": uuid(), "name": "Recognize", "namespace": "ASR",
	"streamId": 1},
	"payload": {"advancedASROptions": {"manual_punctuation": False, "partial_results": True},
	"disableAntimatNormalizer": False, "format": "audio/opus", "lang": "en-US",
	"music_request2": {"headers": {"Content-Type": "audio/opus"}}, "punctuation": False,
	"tags": "PASS_AUDIO;", "topic": "queries"}}}
	)
	ws.send_binary(data[0])
	pos += 1
	elif msg.name == "text":
	d = msg.json["directive"]
	if d["header"]["name"] == "Result":
	if pos < len(data):
	ws.send_binary(data[pos])
	pos += 1
	else:
	ws.close()
	return None
	elif d["header"]["name"] == "MusicResult":
	if d["payload"]["result"] == "success":
	p = d["payload"]["data"]["match"]
	art = []
	for a in p["artists"]:
	if a["various"] and "Various" not in art:
	art.append("Various")
	else:
	art.append(a["name"])
	ret = ", ".join(art or ["Unknown"]) + " - " + p["title"]
	ws.close()
	return ret


	if __name__ == '__main__':
	print(recognize(load_file("sample2.ogg")) or "not found")