Skip to content

Instantly share code, notes, and snippets.

@aylarov
Created June 30, 2017 15:58
Show Gist options
  • Save aylarov/eb696ac8b3d141e173e7d656a9f37d77 to your computer and use it in GitHub Desktop.
Save aylarov/eb696ac8b3d141e173e7d656a9f37d77 to your computer and use it in GitHub Desktop.
Voximplant ASR + API.ai NLP
// Enable ASR module for speech recognition capabilities
require(Modules.ASR);
var call, asr,
baseURL = "https://api.api.ai/v1/",
accessToken = "PUT YOUR API.AI ACCESS TOKEN HERE",
nlp_result;
// Inbound call arrives
VoxEngine.addEventListener(AppEvents.CallAlerting, function (e) {
call = e.call;
call.addEventListener(CallEvents.Connected, handleCallConnected);
call.addEventListener(CallEvents.Disconnected, VoxEngine.terminate);
call.answer();
});
// Play intro after the call connected
function handleCallConnected(e) {
setTimeout(function (e) {
call.say("Hi! Voximplant bot at your service, what do you want to know?", Language.US_ENGLISH_FEMALE);
call.addEventListener(CallEvents.PlaybackFinished, handleIntroPlayed);
}, 500);
}
// Setup and enable real-time speech recognition
function handleIntroPlayed(e) {
call.removeEventListener(CallEvents.PlaybackFinished);
// Freeform recognition, language - US English
asr = VoxEngine.createASR({
lang: ASRLanguage.ENGLISH_US
});
// Add events handlers
asr.addEventListener(ASREvents.Result, handleResult);
asr.addEventListener(ASREvents.CaptureStarted, function (asrevent) {
call.stopPlayback();
});
// Send call audio to recognition engine
call.sendMediaTo(asr);
}
// Handle recognition result
function handleResult(e) {
asr.stop();
Logger.write("RESULT: " + e.text);
Logger.write("CONFIDENCE: " + e.confidence);
// Send text to API.ai for NLP
var url = baseURL + "query?query=" + encodeURIComponent(e.text) + "&sessionId=1" +
"&lang=en";
var opts = {
headers: ["Content-Type: application/json;charset=utf-8", "Authorization: bearer " + accessToken]
};
Net.httpRequest(url, handleHttp, opts);
}
// Handle API.ai processing result
function handleHttp(e) {
Logger.write(e.code);
Logger.write(JSON.stringify(e.text));
if (e.code == 200) {
var res = JSON.parse(e.text);
if (res.result.metadata.intentName == "FoundersIntent") {
nlp_result = res.result.parameters.any;
//call.say("Ok, checking the info about founders of " + nlp_result, Language.US_ENGLISH_FEMALE);
// Use recognized company name to find founders info at Wikipedia
WikiInfobox(nlp_result, "en", handleWikiParserResult);
} else {
// No API.ai intents for the request
call.say("I couldn't understand what you asked about, I can tell only about founders of different companies", Language.US_ENGLISH_FEMALE);
call.addEventListener(CallEvents.PlaybackFinished, function() {
call.removeEventListener(CallEvents.PlaybackFinished);
handleIntroPlayed();
});
}
} else {
// HTTP error - play Oops message
playOops();
}
}
// Handle X-wiki parser result
function handleWikiParserResult(err, data) {
if (err) playOops();
else {
Logger.write(JSON.stringify(data));
var founders_raw, founders = [];
// Some black magic , no strict rules for Wikipedia infobox, should work for most of the cases
if (data.founder !== undefined) {
if (Array.isArray(data.founder)) founders_raw = data.founder;
else founders_raw = [ data.founder ];
} else if (data.founders !== undefined) {
if (Array.isArray(data.founders)) founders_raw = data.founders;
else founders_raw = [ data.founders ];
}
else {
// No info found
call.say("I couldn't find the info", Language.US_ENGLISH_FEMALE);
call.addEventListener(CallEvents.PlaybackFinished, function() {
call.removeEventListener(CallEvents.PlaybackFinished);
handleIntroPlayed();
});
return;
}
// Creating the text for TTS
var founders_text = " ";
for (var z=0;z < founders_raw.length; z++) {
if (founders_raw[z].type == "link") founders.push(founders_raw[z]);
else if (founders_raw[z].value !== undefined &&
founders_raw[z].value.indexOf("{") == -1 &&
founders_raw[z].value.indexOf("}") == -1 &&
founders_raw[z].value.indexOf("|") == -1 &&
founders_raw[z].value.indexOf("*") == -1) founders.push({ text: founders_raw[z].value });
}
for (var i=0;i < founders.length;i++) {
founders_text += founders[i].text + (i!=founders.length?", ":"");
}
// Voila!
if (founders.length == 1) call.say("The founder of " + nlp_result + " is" + founders_text);
else call.say("The founders of " + nlp_result + " are" + (founders.length==2?founders_text.replace(","," and"):founders_text));
call.addEventListener(CallEvents.PlaybackFinished, function() {
call.removeEventListener(CallEvents.PlaybackFinished);
handleIntroPlayed();
});
}
}
// Play Ooops message if something went wrong
function playOops() {
call.say("Oops! Sorry, I couldn't handle the request, please try again", Language.US_ENGLISH_FEMALE);
call.addEventListener(CallEvents.PlaybackFinished, function (e) {
call.removeEventListener(CallEvents.PlaybackFinished);
handleIntroPlayed();
});
}
@marcinwrobel12
Copy link

Is it possible to make voice recognition in conference? I try but I can't do it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment