Skip to content

Instantly share code, notes, and snippets.

@samehmohamed88
Created January 13, 2018 12:12
Show Gist options
  • Save samehmohamed88/24c0df21cdd34d62a0e1585bc10d1417 to your computer and use it in GitHub Desktop.
Save samehmohamed88/24c0df21cdd34d62a0e1585bc10d1417 to your computer and use it in GitHub Desktop.
// g++ -O3 -std=c++11 -o ps_boilerplate mic.cpp \
-DMODELDIR=\"`pkg-config --variable=modeldir pocketsphinx`\" \
`pkg-config --cflags --libs pocketsphinx sphinxbase`
#include <iostream>
#include <string>
#include <pocketsphinx.h>
#include <sphinxbase/ad.h>
#include <sphinxbase/err.h>
using namespace std;
void recognize_from_microphone();
ps_decoder_t *ps; // create pocketsphinx decoder structure
cmd_ln_t *config; // create configuration structure
ad_rec_t *ad; // create audio recording structure - for use with ALSA functions
int16 adbuf[4096]; // buffer array to hold audio data
uint8 utt_started, in_speech; // flags for tracking active speech - has speech started? - is speech currently happening?
int32 k; // holds the number of frames in the audio buffer
// char const *hyp; // pointer to "hypothesis" (best guess at the decoded result)
int main(int argc, char *argv[]) {
config = cmd_ln_init(NULL, ps_args(), TRUE, // Load the configuration structure - ps_args() passes the default values
"-hmm", MODELDIR "/en-us/en-us",
"-kws", "./keyphrase.file",
"-dict", MODELDIR "/en-us/cmudict-en-us.dict",
"-logfn", "/dev/null", // suppress log info from being sent to screen
NULL);
ps = ps_init(config); // initialize the pocketsphinx decoder
ad = ad_open_dev("sysdefault", (int) cmd_ln_float32_r(config, "-samprate")); // open default microphone at default samplerate
while(1){
// string decoded_speech = recognize_from_microphone(); // call the function to capture and decode speech
// cout << "Decoded Speech: "<< decoded_speech << "\n" <<endl; // send decoded speech to screen
cout << "calling recognize_from_microphone " <<endl;
recognize_from_microphone(); // call the function to capture and decode speech
}
ad_close(ad); // close the microphone
}
void recognize_from_microphone(){
const char* hyp;
string wakeup("zane");
ad_start_rec(ad); // start recording
ps_start_utt(ps); // mark the start of the utterance
utt_started = FALSE; // clear the utt_started flag
while(1) {
k = ad_read(ad, adbuf, 4096); // capture the number of frames in the audio buffer
ps_process_raw(ps, adbuf, k, FALSE, FALSE); // send the audio buffer to the pocketsphinx decoder
in_speech = ps_get_in_speech(ps); // test to see if speech is being detected
if (in_speech && !utt_started) { // if speech has started and utt_started flag is false
utt_started = TRUE; // then set the flag
}
if (!in_speech && utt_started) { // if speech has ended and the utt_started flag is true
ps_end_utt(ps); // then mark the end of the utterance
ad_stop_rec(ad); // stop recording
hyp = ps_get_hyp(ps, NULL ); // query pocketsphinx for "hypothesis" of decoded statement
// return hyp; // the function returns the hypothesis
// cout << "Decoded Speech: "<< hyp << "\n" <<endl; // send decoded speech to screen
if (hyp != NULL)
{
string str(hyp);
if (str.compare(wakeup) != 0) {
// printf("Recognized: %s\n", hyp);
cout << "Wake up word detected" << endl;
}
}
break; // exit the while loop and return to main
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment