Created
June 23, 2015 07:29
-
-
Save KzoNag/bfe8c0f07335ce2268cc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using UnityEngine; | |
using System.Collections; | |
using System.Collections.Generic; | |
using System; | |
using System.IO; | |
using System.Text; | |
public class VoiceRecognizer : MonoBehaviour | |
{ | |
[SerializeField] | |
private bool executeOnAwake = false; | |
[SerializeField] | |
private string googleApiKey; | |
public string GoogleApiKey{ get{ return googleApiKey; } } | |
[SerializeField] | |
private AudioClip targetClip; | |
public AudioClip TargetClip{ get{ return targetClip; } } | |
public AudioClip ConvertedClip{ get; private set; } | |
public byte[] WaveData{ get; private set; } | |
public string Result{ get{ return (state == RecognizeState.Done) ? www.text : null; } } | |
public string Error{ get{ return (state == RecognizeState.Done) ? www.error : null; } } | |
public enum RecognizeState | |
{ | |
Stay, | |
Doing, | |
Done | |
} | |
private RecognizeState state = RecognizeState.Stay; | |
public RecognizeState State{ get{ return state; } } | |
private readonly int needFrequency = 16000; | |
private readonly int needChannels = 1; | |
private WWW www; | |
void Awake () | |
{ | |
if (executeOnAwake) | |
{ | |
ExecuteRecognize(); | |
} | |
} | |
void Update () | |
{ | |
if (state == RecognizeState.Doing && www.isDone) | |
{ | |
state = RecognizeState.Done; | |
//LogResult(); | |
} | |
} | |
public bool Reset() | |
{ | |
if (state == RecognizeState.Doing) | |
{ | |
return false; | |
} | |
state = RecognizeState.Stay; | |
www = null; | |
ConvertedClip = null; | |
WaveData = null; | |
return true; | |
} | |
/// <summary> | |
/// 音声認識実行リクエスト | |
/// </summary> | |
/// <returns>リクエスト実行成功・失敗</returns> | |
/// <param name="_targetClip">認識したい音声のAudioClip</param> | |
/// <param name="_googleApiKey">Google API key.</param> | |
public bool ExecuteRecognize(AudioClip _targetClip, string _googleApiKey) | |
{ | |
targetClip = _targetClip; | |
googleApiKey = _googleApiKey; | |
return ExecuteRecognize(); | |
} | |
/// <summary> | |
/// 音声認識実行リクエスト | |
/// </summary> | |
/// <returns>リクエスト実行成功・失敗</returns> | |
public bool ExecuteRecognize() | |
{ | |
if (state != RecognizeState.Stay || targetClip == null || string.IsNullOrEmpty(googleApiKey)) | |
{ | |
return false; | |
} | |
// AudioClipを認識可能な形式に変換 | |
ConvertedClip = ConvertSamplingAudioClip(targetClip, needChannels, needFrequency); | |
// AudioClipからWAVEフォーマットのデータを生成 | |
WaveData = CreateWaveData(ConvertedClip); | |
// WAVEフォーマットのデータで音声認識リクエスト発行 | |
RequestSpeechAPI(WaveData, needFrequency); | |
state = RecognizeState.Doing; | |
return true; | |
} | |
/// <summary> | |
/// AudioClipを任意のチャンネル、サンプリングレートに変換 | |
/// </summary> | |
AudioClip ConvertSamplingAudioClip(AudioClip src, int channels, int frequency) | |
{ | |
// 同一設定の場合はそのまま返す | |
if (src.frequency == frequency && src.channels == channels) | |
{ | |
return src; | |
} | |
float[] srcData = new float[src.channels * src.samples]; | |
src.GetData(srcData, 0); | |
int dataLength = Mathf.CeilToInt(frequency * channels * src.length); | |
float[] destData = new float[dataLength]; | |
double rate = (double)src.frequency / (double)frequency; | |
int destIndex = 0; | |
int srcIndex = 0; | |
int destSampleIndex = 0; | |
int srcSampleIndex = 0; | |
int destChannelIndex = 0; | |
int srcChannelIndex = 0; | |
while(destIndex < destData.Length) | |
{ | |
// 変換先が何サンプル目かを計算 | |
destSampleIndex = destIndex / channels; | |
// 変換元の何サンプル目を使用するかを計算 | |
srcSampleIndex = (int)(destSampleIndex * rate); | |
// 変換先のチャンネルインデックスを計算 | |
destChannelIndex = destIndex % channels; | |
// 変換元で使用するチャンネルインデックスを決定 | |
srcChannelIndex = (destChannelIndex < src.channels) ? destChannelIndex : src.channels - 1; | |
// 変換元のデータインデックスを決定 | |
srcIndex = srcSampleIndex * src.channels + srcChannelIndex; | |
// データをセット | |
destData[destIndex] = srcData[srcIndex]; | |
++destIndex; | |
} | |
AudioClip dest = AudioClip.Create("converted", dataLength, channels, frequency, false, false); | |
dest.SetData(destData, 0); | |
return dest; | |
} | |
/// <summary> | |
/// AudioClipからWAVEフォーマットのバイト列を生成 | |
/// </summary> | |
byte[] CreateWaveData(AudioClip clip) | |
{ | |
List<byte> list = new List<byte>(); | |
// 順番は後のほうだが途中で使うので先に定義 | |
short bit = 16; | |
// ヘッダ情報 | |
string riff = "RIFF"; | |
int fileSizeWithout8 = 0; | |
string wave = "WAVE"; | |
string fmt = "fmt "; | |
int fmtSize = 16; | |
short formatID = 1; | |
short channels = (short)clip.channels; | |
int frequency = clip.frequency; | |
int verocity = frequency * (bit / 8) * channels; | |
short blockSize = (short)((bit / (short)8) * channels); | |
// 順番的にはここに bit = 16 | |
string data = "data"; | |
int dataSize = clip.channels * clip.samples * (bit / 8); | |
fileSizeWithout8 = dataSize + 36; // wave ~ dataSize までのバイト数を加算 | |
// 各情報をバイト列に変換して格納する | |
foreach (var c in riff.ToCharArray()) | |
{ | |
list.Add((byte)c); | |
} | |
foreach(var b in BitConverter.GetBytes(fileSizeWithout8)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach (var c in wave.ToCharArray()) | |
{ | |
list.Add((byte)c); | |
} | |
foreach (var c in fmt.ToCharArray()) | |
{ | |
list.Add((byte)c); | |
} | |
foreach(var b in BitConverter.GetBytes(fmtSize)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach(var b in BitConverter.GetBytes(formatID)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach(var b in BitConverter.GetBytes(channels)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach(var b in BitConverter.GetBytes(frequency)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach(var b in BitConverter.GetBytes(verocity)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach(var b in BitConverter.GetBytes(blockSize)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach(var b in BitConverter.GetBytes(bit)) | |
{ | |
list.Add((byte)b); | |
} | |
foreach (var c in data.ToCharArray()) | |
{ | |
list.Add((byte)c); | |
} | |
foreach(var b in BitConverter.GetBytes(dataSize)) | |
{ | |
list.Add((byte)b); | |
} | |
float[] rawData = new float[clip.channels * clip.samples]; | |
clip.GetData(rawData, 0); | |
foreach (var f in rawData) | |
{ | |
foreach (var b in GetBytes(f, bit)) | |
{ | |
list.Add((byte)b); | |
} | |
} | |
return list.ToArray(); | |
} | |
byte[] GetBytes(float f, int bitSize) | |
{ | |
// 8bit => unsigned | |
if (bitSize == 8) | |
{ | |
byte val = (byte)((byte)(f * byte.MaxValue) + 128); | |
return new byte[]{ val }; | |
} | |
// 16bit => signed | |
else if(bitSize == 16) | |
{ | |
short val = (short)(f * short.MaxValue); | |
return BitConverter.GetBytes(val); | |
} | |
return new byte[0]; | |
} | |
/// <summary> | |
/// WAVEフォーマットのバイト列をファイルに保存 | |
/// </summary> | |
/// <param name="waveData">Wave data.</param> | |
/// <param name="path">Path.</param> | |
void SaveWaveFile(byte[] waveData, string path) | |
{ | |
var fileStreme = new FileStream(path, FileMode.Create, FileAccess.Write); | |
fileStreme.Write(waveData, 0, waveData.Length); | |
fileStreme.Close(); | |
} | |
/// <summary> | |
/// Google Speech APIに認識リクエストを送信 | |
/// </summary> | |
void RequestSpeechAPI(byte[] waveData, int frequency) | |
{ | |
var url = "https://www.google.com/speech-api/v2/recognize?output=json&lang=ja&key=" + googleApiKey; | |
var headers = new Dictionary<string, string>(); | |
headers.Add("Method", "POST"); | |
headers.Add("Content-Type", "audio/l16; rate=" + frequency.ToString()); | |
headers.Add("Content-Length", waveData.Length.ToString()); | |
headers.Add("Accept", "application/json"); | |
www = new WWW(url, waveData, headers); | |
} | |
void LogResult() | |
{ | |
if (www == null) | |
{ | |
Logger.LogError("www == null"); | |
} | |
else if (!www.isDone) | |
{ | |
Logger.LogError("www is doing"); | |
} | |
else if (!string.IsNullOrEmpty(www.error)) | |
{ | |
Logger.LogError("[Error]" + www.error); | |
} | |
else | |
{ | |
Logger.Log("[Success]" + www.text); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment