Skip to content

Instantly share code, notes, and snippets.

@cyc115
Created May 16, 2014 18:48
Show Gist options
  • Save cyc115/08c95b0f07ae1799ec8f to your computer and use it in GitHub Desktop.
Save cyc115/08c95b0f07ae1799ec8f to your computer and use it in GitHub Desktop.
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package org.chromium.content.browser;
import android.content.ComponentName;
import android.content.Context;
import android.content.Intent;
import android.content.pm.PackageManager;
import android.content.pm.PackageManager.NameNotFoundException;
import android.content.pm.ResolveInfo;
import android.content.pm.ServiceInfo;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognitionService;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import org.chromium.base.CalledByNative;
import org.chromium.base.JNINamespace;
import org.chromium.content.browser.SpeechRecognitionError;
import java.util.ArrayList;
import java.util.List;
/**
* This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API
* on Android. Using Android's platform recognizer offers several benefits, like good quality and
* good local fallback when no data connection is available.
*/
@JNINamespace("content")
public class SpeechRecognition {
// Constants describing the speech recognition provider we depend on.
private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox";
private static final int PROVIDER_MIN_VERSION = 300207030;
// We track the recognition state to remember what events we need to send when recognition is
// being aborted. Once Android's recognizer is cancelled, its listener won't yield any more
// events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were
// called before.
private static final int STATE_IDLE = 0;
private static final int STATE_AWAITING_SPEECH = 1;
private static final int STATE_CAPTURING_SPEECH = 2;
private int mState;
// The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and
// PROVIDER_MIN_VERSION as selected by initialize().
private static ComponentName mRecognitionProvider;
private final Context mContext;
private final Intent mIntent;
private final RecognitionListener mListener;
private SpeechRecognizer mRecognizer;
// Native pointer to C++ SpeechRecognizerImplAndroid.
private int mNativeSpeechRecognizerImplAndroid;
// Remember if we are using continuous recognition.
private boolean mContinuous;
// Internal class to handle events from Android's SpeechRecognizer and route them to native.
class Listener implements RecognitionListener {
@Override
public void onBeginningOfSpeech() {
mState = STATE_CAPTURING_SPEECH;
nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid);
}
@Override
public void onBufferReceived(byte[] buffer) { }
@Override
public void onEndOfSpeech() {
// Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending
// events. The Android API documentation is vague as to when onEndOfSpeech is called in
// continuous mode, whereas the Web Speech API defines a stronger semantic on the
// equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend
// event is to trigger it when the last result is received or the session is aborted.
if (!mContinuous) {
nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
// Since Android doesn't have a dedicated event for when audio capture is finished,
// we fire it after speech has ended.
nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
mState = STATE_IDLE;
}
}
@Override
public void onError(int error) {
int code = SpeechRecognitionError.NONE;
// Translate Android SpeechRecognizer errors to Web Speech API errors.
switch(error) {
case SpeechRecognizer.ERROR_AUDIO:
code = SpeechRecognitionError.AUDIO;
break;
case SpeechRecognizer.ERROR_CLIENT:
code = SpeechRecognitionError.ABORTED;
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
code = SpeechRecognitionError.NOT_ALLOWED;
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
case SpeechRecognizer.ERROR_NETWORK:
case SpeechRecognizer.ERROR_SERVER:
code = SpeechRecognitionError.NETWORK;
break;
case SpeechRecognizer.ERROR_NO_MATCH:
code = SpeechRecognitionError.NO_MATCH;
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
code = SpeechRecognitionError.NO_SPEECH;
break;
default:
assert false;
return;
}
terminate(code);
}
@Override
public void onEvent(int event, Bundle bundle) { }
@Override
public void onPartialResults(Bundle bundle) {
handleResults(bundle, true);
}
@Override
public void onReadyForSpeech(Bundle bundle) {
mState = STATE_AWAITING_SPEECH;
nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid);
}
@Override
public void onResults(Bundle bundle) {
handleResults(bundle, false);
// We assume that onResults is called only once, at the end of a session, thus we
// terminate. If one day the recognition provider changes dictation mode behavior to
// call onResults several times, we should terminate only if (!mContinuous).
terminate(SpeechRecognitionError.NONE);
}
@Override
public void onRmsChanged(float rms) { }
private void handleResults(Bundle bundle, boolean provisional) {
if (mContinuous && provisional) {
// In continuous mode, Android's recognizer sends final results as provisional.
provisional = false;
}
ArrayList<String> list = bundle.getStringArrayList(
SpeechRecognizer.RESULTS_RECOGNITION);
String[] results = list.toArray(new String[list.size()]);
float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid,
results,
scores,
provisional);
}
}
// This method must be called before any instance of SpeechRecognition can be created. It will
// query Android's package manager to find a suitable speech recognition provider that supports
// continuous recognition.
public static boolean initialize(Context context) {
if (!SpeechRecognizer.isRecognitionAvailable(context))
return false;
PackageManager pm = context.getPackageManager();
Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE);
final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES);
for (ResolveInfo resolve : list) {
ServiceInfo service = resolve.serviceInfo;
if (!service.packageName.equals(PROVIDER_PACKAGE_NAME))
continue;
int versionCode;
try {
versionCode = pm.getPackageInfo(service.packageName, 0).versionCode;
} catch (NameNotFoundException e) {
continue;
}
if (versionCode < PROVIDER_MIN_VERSION)
continue;
mRecognitionProvider = new ComponentName(service.packageName, service.name);
return true;
}
// If we reach this point, we failed to find a suitable recognition provider.
return false;
}
private SpeechRecognition(final Context context, int nativeSpeechRecognizerImplAndroid) {
mContext = context;
mContinuous = false;
mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid;
mListener = new Listener();
mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
if (mRecognitionProvider != null) {
mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, mRecognitionProvider);
} else {
// It is possible to force-enable the speech recognition web platform feature (using a
// command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME
// provider, in which case the first available speech recognition provider is used.
// Caveat: Continuous mode may not work as expected with a different provider.
mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext);
}
mRecognizer.setRecognitionListener(mListener);
}
// This function destroys everything when recognition is done, taking care to properly tear
// down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called.
private void terminate(int error) {
if (mState != STATE_IDLE) {
if (mState == STATE_CAPTURING_SPEECH) {
nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
}
nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
mState = STATE_IDLE;
}
if (error != SpeechRecognitionError.NONE)
nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error);
mRecognizer.destroy();
mRecognizer = null;
nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid);
mNativeSpeechRecognizerImplAndroid = 0;
}
@CalledByNative
private static SpeechRecognition createSpeechRecognition(
Context context, int nativeSpeechRecognizerImplAndroid) {
return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid);
}
@CalledByNative
private void startRecognition(String language, boolean continuous, boolean interim_results) {
if (mRecognizer == null)
return;
mContinuous = continuous;
mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous);
mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language);
mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interim_results);
mRecognizer.startListening(mIntent);
}
@CalledByNative
private void abortRecognition() {
if (mRecognizer == null)
return;
mRecognizer.cancel();
terminate(SpeechRecognitionError.ABORTED);
}
@CalledByNative
private void stopRecognition() {
if (mRecognizer == null)
return;
mContinuous = false;
mRecognizer.stopListening();
}
// Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc
private native void nativeOnAudioStart(int nativeSpeechRecognizerImplAndroid);
private native void nativeOnSoundStart(int nativeSpeechRecognizerImplAndroid);
private native void nativeOnSoundEnd(int nativeSpeechRecognizerImplAndroid);
private native void nativeOnAudioEnd(int nativeSpeechRecognizerImplAndroid);
private native void nativeOnRecognitionResults(int nativeSpeechRecognizerImplAndroid,
String[] results,
float[] scores,
boolean provisional);
private native void nativeOnRecognitionError(int nativeSpeechRecognizerImplAndroid, int error);
private native void nativeOnRecognitionEnd(int nativeSpeechRecognizerImplAndroid);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment