Skip to content

Instantly share code, notes, and snippets.

@lliepmah
Created January 24, 2018 12:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lliepmah/1ec330fe516375b675f2407976336475 to your computer and use it in GitHub Desktop.
Save lliepmah/1ec330fe516375b675f2407976336475 to your computer and use it in GitHub Desktop.
Speech Recognition
import android.Manifest;
import android.content.ComponentName;
import android.content.Intent;
import android.content.ServiceConnection;
import android.content.pm.PackageManager;
import android.content.res.Resources;
import android.os.Bundle;
import android.os.IBinder;
import android.support.annotation.NonNull;
import android.support.v4.app.ActivityCompat;
import android.support.v4.content.res.ResourcesCompat;
import android.support.v7.app.AppCompatActivity;
import android.support.v7.widget.LinearLayoutManager;
import android.support.v7.widget.RecyclerView;
import android.support.v7.widget.Toolbar;
import android.text.TextUtils;
import android.view.LayoutInflater;
import android.view.View;
import android.view.ViewGroup;
import android.widget.TextView;
import java.util.ArrayList;
import ru.kodep.speech.R;
public class MainActivity extends AppCompatActivity implements MessageDialogFragment.Listener {
private static final String FRAGMENT_MESSAGE_DIALOG = "message_dialog";
private static final String STATE_RESULTS = "results";
private static final int REQUEST_RECORD_AUDIO_PERMISSION = 1;
private SpeechService mSpeechService;
private VoiceRecorder mVoiceRecorder;
private final VoiceRecorder.Callback mVoiceCallback = new VoiceRecorder.Callback() {
@Override
public void onVoiceStart() {
showStatus(true);
if (mSpeechService != null) {
mSpeechService.startRecognizing(mVoiceRecorder.getSampleRate());
}
}
@Override
public void onVoice(byte[] data, int size) {
if (mSpeechService != null) {
mSpeechService.recognize(data, size);
}
}
@Override
public void onVoiceEnd() {
showStatus(false);
if (mSpeechService != null) {
mSpeechService.finishRecognizing();
}
}
};
// Resource caches
private int mColorHearing;
private int mColorNotHearing;
// View references
private TextView mStatus;
private TextView mText;
private ResultAdapter mAdapter;
private RecyclerView mRecyclerView;
private final ServiceConnection mServiceConnection = new ServiceConnection() {
@Override
public void onServiceConnected(ComponentName componentName, IBinder binder) {
mSpeechService = SpeechService.from(binder);
mSpeechService.addListener(mSpeechServiceListener);
mStatus.setVisibility(View.VISIBLE);
}
@Override
public void onServiceDisconnected(ComponentName componentName) {
mSpeechService = null;
}
};
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
final Resources resources = getResources();
final Resources.Theme theme = getTheme();
mColorHearing = ResourcesCompat.getColor(resources, R.color.status_hearing, theme);
mColorNotHearing = ResourcesCompat.getColor(resources, R.color.status_not_hearing, theme);
setSupportActionBar((Toolbar) findViewById(R.id.toolbar));
mStatus = (TextView) findViewById(R.id.status);
mText = (TextView) findViewById(R.id.text);
mRecyclerView = (RecyclerView) findViewById(R.id.recycler_view);
mRecyclerView.setLayoutManager(new LinearLayoutManager(this));
final ArrayList<String> results = savedInstanceState == null ? null :
savedInstanceState.getStringArrayList(STATE_RESULTS);
mAdapter = new ResultAdapter(results);
mRecyclerView.setAdapter(mAdapter);
}
@Override
protected void onStart() {
super.onStart();
// Prepare Cloud Speech API
bindService(new Intent(this, SpeechService.class), mServiceConnection, BIND_AUTO_CREATE);
// Start listening to voices
if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
== PackageManager.PERMISSION_GRANTED) {
startVoiceRecorder();
} else if (ActivityCompat.shouldShowRequestPermissionRationale(this,
Manifest.permission.RECORD_AUDIO)) {
showPermissionMessageDialog();
} else {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO},
REQUEST_RECORD_AUDIO_PERMISSION);
}
}
@Override
protected void onStop() {
// Stop listening to voice
stopVoiceRecorder();
// Stop Cloud Speech API
mSpeechService.removeListener(mSpeechServiceListener);
unbindService(mServiceConnection);
mSpeechService = null;
super.onStop();
}
@Override
protected void onSaveInstanceState(Bundle outState) {
super.onSaveInstanceState(outState);
if (mAdapter != null) {
outState.putStringArrayList(STATE_RESULTS, mAdapter.getResults());
}
}
@Override
public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions,
@NonNull int[] grantResults) {
if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
if (permissions.length == 1 && grantResults.length == 1
&& grantResults[0] == PackageManager.PERMISSION_GRANTED) {
startVoiceRecorder();
} else {
showPermissionMessageDialog();
}
} else {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
}
}
private void startVoiceRecorder() {
if (mVoiceRecorder != null) {
mVoiceRecorder.stop();
}
mVoiceRecorder = new VoiceRecorder(mVoiceCallback);
mVoiceRecorder.start();
}
private void stopVoiceRecorder() {
if (mVoiceRecorder != null) {
mVoiceRecorder.stop();
mVoiceRecorder = null;
}
}
private void showPermissionMessageDialog() {
MessageDialogFragment
.newInstance(getString(R.string.permission_message))
.show(getSupportFragmentManager(), FRAGMENT_MESSAGE_DIALOG);
}
private void showStatus(final boolean hearingVoice) {
runOnUiThread(new Runnable() {
@Override
public void run() {
mStatus.setTextColor(hearingVoice ? mColorHearing : mColorNotHearing);
}
});
}
@Override
public void onMessageDialogDismissed() {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO},
REQUEST_RECORD_AUDIO_PERMISSION);
}
private final SpeechService.Listener mSpeechServiceListener =
new SpeechService.Listener() {
@Override
public void onSpeechRecognized(final String text, final boolean isFinal) {
if (isFinal) {
mVoiceRecorder.dismiss();
}
if (mText != null && !TextUtils.isEmpty(text)) {
runOnUiThread(new Runnable() {
@Override
public void run() {
if (isFinal) {
mText.setText(null);
mAdapter.addResult(text);
mRecyclerView.smoothScrollToPosition(0);
} else {
mText.setText(text);
}
}
});
}
}
};
private static class ViewHolder extends RecyclerView.ViewHolder {
TextView text;
ViewHolder(LayoutInflater inflater, ViewGroup parent) {
super(inflater.inflate(R.layout.item_result, parent, false));
text = (TextView) itemView.findViewById(R.id.text);
}
}
private static class ResultAdapter extends RecyclerView.Adapter<ViewHolder> {
private final ArrayList<String> mResults = new ArrayList<>();
ResultAdapter(ArrayList<String> results) {
if (results != null) {
mResults.addAll(results);
}
}
@Override
public ViewHolder onCreateViewHolder(ViewGroup parent, int viewType) {
return new ViewHolder(LayoutInflater.from(parent.getContext()), parent);
}
@Override
public void onBindViewHolder(ViewHolder holder, int position) {
holder.text.setText(mResults.get(position));
}
@Override
public int getItemCount() {
return mResults.size();
}
void addResult(String result) {
mResults.add(0, result);
notifyItemInserted(0);
}
public ArrayList<String> getResults() {
return mResults;
}
}
}
import android.app.Dialog;
import android.content.DialogInterface;
import android.os.Bundle;
import android.support.annotation.NonNull;
import android.support.v7.app.AlertDialog;
import android.support.v7.app.AppCompatDialogFragment;
public class MessageDialogFragment extends AppCompatDialogFragment {
public interface Listener {
/**
* Called when the dialog is dismissed.
*/
void onMessageDialogDismissed();
}
private static final String ARG_MESSAGE = "message";
/**
* Creates a new instance of {@link MessageDialogFragment}.
*
* @param message The message to be shown on the dialog.
* @return A newly created dialog fragment.
*/
public static MessageDialogFragment newInstance(String message) {
final MessageDialogFragment fragment = new MessageDialogFragment();
final Bundle args = new Bundle();
args.putString(ARG_MESSAGE, message);
fragment.setArguments(args);
return fragment;
}
@NonNull
@Override
public Dialog onCreateDialog(Bundle savedInstanceState) {
return new AlertDialog.Builder(getContext())
.setMessage(getArguments().getString(ARG_MESSAGE))
.setPositiveButton(android.R.string.ok, new DialogInterface.OnClickListener() {
@Override
public void onClick(DialogInterface dialog, int which) {
((Listener) getActivity()).onMessageDialogDismissed();
}
})
.setOnDismissListener(new DialogInterface.OnDismissListener() {
@Override
public void onDismiss(DialogInterface dialogInterface) {
((Listener) getActivity()).onMessageDialogDismissed();
}
})
.create();
}
}
import android.app.Service;
import android.content.Context;
import android.content.Intent;
import android.content.SharedPreferences;
import android.os.AsyncTask;
import android.os.Binder;
import android.os.Handler;
import android.os.IBinder;
import android.support.annotation.NonNull;
import android.support.annotation.Nullable;
import android.text.TextUtils;
import android.util.Log;
import com.google.auth.Credentials;
import com.google.auth.oauth2.AccessToken;
import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognizeRequest;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeechGrpc;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.SpeechRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import io.grpc.CallOptions;
import io.grpc.Channel;
import io.grpc.ClientCall;
import io.grpc.ClientInterceptor;
import io.grpc.ClientInterceptors;
import io.grpc.ManagedChannel;
import io.grpc.Metadata;
import io.grpc.MethodDescriptor;
import io.grpc.Status;
import io.grpc.StatusException;
import io.grpc.internal.DnsNameResolverProvider;
import io.grpc.okhttp.OkHttpChannelProvider;
import io.grpc.stub.StreamObserver;
import ru.kodep.speech.R;
public class SpeechService extends Service {
public interface Listener {
void onSpeechRecognized(String text, boolean isFinal);
}
private static final String TAG = "SpeechService";
private static final String PREFS = "SpeechService";
private static final String PREF_ACCESS_TOKEN_VALUE = "access_token_value";
private static final String PREF_ACCESS_TOKEN_EXPIRATION_TIME = "access_token_expiration_time";
/** We reuse an access token if its expiration time is longer than this. */
private static final int ACCESS_TOKEN_EXPIRATION_TOLERANCE = 30 * 60 * 1000; // thirty minutes
/** We refresh the current access token before it expires. */
private static final int ACCESS_TOKEN_FETCH_MARGIN = 60 * 1000; // one minute
public static final List<String> SCOPE =
Collections.singletonList("https://www.googleapis.com/auth/cloud-platform");
private static final String HOSTNAME = "speech.googleapis.com";
private static final int PORT = 443;
private final SpeechBinder mBinder = new SpeechBinder();
private final ArrayList<Listener> mListeners = new ArrayList<>();
private volatile AccessTokenTask mAccessTokenTask;
private SpeechGrpc.SpeechStub mApi;
private static Handler mHandler;
private final StreamObserver<StreamingRecognizeResponse> mResponseObserver
= new StreamObserver<StreamingRecognizeResponse>() {
@Override
public void onNext(StreamingRecognizeResponse response) {
String text = null;
boolean isFinal = false;
if (response.getResultsCount() > 0) {
final StreamingRecognitionResult result = response.getResults(0);
isFinal = result.getIsFinal();
if (result.getAlternativesCount() > 0) {
final SpeechRecognitionAlternative alternative = result.getAlternatives(0);
text = alternative.getTranscript();
}
}
if (text != null) {
for (Listener listener : mListeners) {
listener.onSpeechRecognized(text, isFinal);
}
}
}
@Override
public void onError(Throwable t) {
Log.e(TAG, "Error calling the API.", t);
}
@Override
public void onCompleted() {
Log.i(TAG, "API completed.");
}
};
private final StreamObserver<RecognizeResponse> mFileResponseObserver
= new StreamObserver<RecognizeResponse>() {
@Override
public void onNext(RecognizeResponse response) {
String text = null;
if (response.getResultsCount() > 0) {
final SpeechRecognitionResult result = response.getResults(0);
if (result.getAlternativesCount() > 0) {
final SpeechRecognitionAlternative alternative = result.getAlternatives(0);
text = alternative.getTranscript();
}
}
if (text != null) {
for (Listener listener : mListeners) {
listener.onSpeechRecognized(text, true);
}
}
}
@Override
public void onError(Throwable t) {
Log.e(TAG, "Error calling the API.", t);
}
@Override
public void onCompleted() {
Log.i(TAG, "API completed.");
}
};
private StreamObserver<StreamingRecognizeRequest> mRequestObserver;
public static SpeechService from(IBinder binder) {
return ((SpeechBinder) binder).getService();
}
@Override
public void onCreate() {
super.onCreate();
mHandler = new Handler();
fetchAccessToken();
}
@Override
public void onDestroy() {
super.onDestroy();
mHandler.removeCallbacks(mFetchAccessTokenRunnable);
mHandler = null;
// Release the gRPC channel.
if (mApi != null) {
final ManagedChannel channel = (ManagedChannel) mApi.getChannel();
if (channel != null && !channel.isShutdown()) {
try {
channel.shutdown().awaitTermination(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Log.e(TAG, "Error shutting down the gRPC channel.", e);
}
}
mApi = null;
}
}
private void fetchAccessToken() {
if (mAccessTokenTask != null) {
return;
}
mAccessTokenTask = new AccessTokenTask();
mAccessTokenTask.execute();
}
private String getDefaultLanguageCode() {
final Locale locale = Locale.getDefault();
final StringBuilder language = new StringBuilder(locale.getLanguage());
final String country = locale.getCountry();
if (!TextUtils.isEmpty(country)) {
language.append("-");
language.append(country);
}
return language.toString();
}
@Nullable
@Override
public IBinder onBind(Intent intent) {
return mBinder;
}
public void addListener(@NonNull Listener listener) {
mListeners.add(listener);
}
public void removeListener(@NonNull Listener listener) {
mListeners.remove(listener);
}
/**
* Starts recognizing speech audio.
*
* @param sampleRate The sample rate of the audio.
*/
public void startRecognizing(int sampleRate) {
if (mApi == null) {
Log.w(TAG, "API not ready. Ignoring the request.");
return;
}
// Configure the API
mRequestObserver = mApi.streamingRecognize(mResponseObserver);
mRequestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(StreamingRecognitionConfig.newBuilder()
.setConfig(RecognitionConfig.newBuilder()
.setLanguageCode(getDefaultLanguageCode())
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setSampleRateHertz(sampleRate)
.build())
.setInterimResults(true)
.setSingleUtterance(true)
.build())
.build());
}
/**
* Recognizes the speech audio. This method should be called every time a chunk of byte buffer
* is ready.
*
* @param data The audio data.
* @param size The number of elements that are actually relevant in the {@code data}.
*/
public void recognize(byte[] data, int size) {
if (mRequestObserver == null) {
return;
}
// Call the streaming recognition API
mRequestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(data, 0, size))
.build());
}
/**
* Finishes recognizing speech audio.
*/
public void finishRecognizing() {
if (mRequestObserver == null) {
return;
}
mRequestObserver.onCompleted();
mRequestObserver = null;
}
/**
* Recognize all data from the specified {@link InputStream}.
*
* @param stream The audio data.
*/
public void recognizeInputStream(InputStream stream) {
try {
mApi.recognize(
RecognizeRequest.newBuilder()
.setConfig(RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build())
.setAudio(RecognitionAudio.newBuilder()
.setContent(ByteString.readFrom(stream))
.build())
.build(),
mFileResponseObserver);
} catch (IOException e) {
Log.e(TAG, "Error loading the input", e);
}
}
private class SpeechBinder extends Binder {
SpeechService getService() {
return SpeechService.this;
}
}
private final Runnable mFetchAccessTokenRunnable = new Runnable() {
@Override
public void run() {
fetchAccessToken();
}
};
private class AccessTokenTask extends AsyncTask<Void, Void, AccessToken> {
@Override
protected AccessToken doInBackground(Void... voids) {
final SharedPreferences prefs =
getSharedPreferences(PREFS, Context.MODE_PRIVATE);
String tokenValue = prefs.getString(PREF_ACCESS_TOKEN_VALUE, null);
long expirationTime = prefs.getLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME, -1);
// Check if the current token is still valid for a while
if (tokenValue != null && expirationTime > 0) {
if (expirationTime
> System.currentTimeMillis() + ACCESS_TOKEN_EXPIRATION_TOLERANCE) {
return new AccessToken(tokenValue, new Date(expirationTime));
}
}
// ***** WARNING *****
// In this sample, we load the credential from a JSON file stored in a raw resource
// folder of this client app. You should never do this in your app. Instead, store
// the file in your server and obtain an access token from there.
// *******************
final InputStream stream = getResources().openRawResource(R.raw.credential);
try {
final GoogleCredentials credentials = GoogleCredentials.fromStream(stream)
.createScoped(SCOPE);
final AccessToken token = credentials.refreshAccessToken();
prefs.edit()
.putString(PREF_ACCESS_TOKEN_VALUE, token.getTokenValue())
.putLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME,
token.getExpirationTime().getTime())
.apply();
return token;
} catch (IOException e) {
Log.e(TAG, "Failed to obtain access token.", e);
}
return null;
}
@Override
protected void onPostExecute(AccessToken accessToken) {
mAccessTokenTask = null;
final ManagedChannel channel = new OkHttpChannelProvider()
.builderForAddress(HOSTNAME, PORT)
.nameResolverFactory(new DnsNameResolverProvider())
.intercept(new GoogleCredentialsInterceptor(new GoogleCredentials(accessToken)
.createScoped(SCOPE)))
.build();
mApi = SpeechGrpc.newStub(channel);
// Schedule access token refresh before it expires
if (mHandler != null) {
mHandler.postDelayed(mFetchAccessTokenRunnable,
Math.max(accessToken.getExpirationTime().getTime()
- System.currentTimeMillis()
- ACCESS_TOKEN_FETCH_MARGIN, ACCESS_TOKEN_EXPIRATION_TOLERANCE));
}
}
}
/**
* Authenticates the gRPC channel using the specified {@link GoogleCredentials}.
*/
private static class GoogleCredentialsInterceptor implements ClientInterceptor {
private final Credentials mCredentials;
private Metadata mCached;
private Map<String, List<String>> mLastMetadata;
GoogleCredentialsInterceptor(Credentials credentials) {
mCredentials = credentials;
}
@Override
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
final MethodDescriptor<ReqT, RespT> method, CallOptions callOptions,
final Channel next) {
return new ClientInterceptors.CheckedForwardingClientCall<ReqT, RespT>(
next.newCall(method, callOptions)) {
@Override
protected void checkedStart(Listener<RespT> responseListener, Metadata headers)
throws StatusException {
Metadata cachedSaved;
URI uri = serviceUri(next, method);
synchronized (this) {
Map<String, List<String>> latestMetadata = getRequestMetadata(uri);
if (mLastMetadata == null || mLastMetadata != latestMetadata) {
mLastMetadata = latestMetadata;
mCached = toHeaders(mLastMetadata);
}
cachedSaved = mCached;
}
headers.merge(cachedSaved);
delegate().start(responseListener, headers);
}
};
}
/**
* Generate a JWT-specific service URI. The URI is simply an identifier with enough
* information for a service to know that the JWT was intended for it. The URI will
* commonly be verified with a simple string equality check.
*/
private URI serviceUri(Channel channel, MethodDescriptor<?, ?> method)
throws StatusException {
String authority = channel.authority();
if (authority == null) {
throw Status.UNAUTHENTICATED
.withDescription("Channel has no authority")
.asException();
}
// Always use HTTPS, by definition.
final String scheme = "https";
final int defaultPort = 443;
String path = "/" + MethodDescriptor.extractFullServiceName(method.getFullMethodName());
URI uri;
try {
uri = new URI(scheme, authority, path, null, null);
} catch (URISyntaxException e) {
throw Status.UNAUTHENTICATED
.withDescription("Unable to construct service URI for auth")
.withCause(e).asException();
}
// The default port must not be present. Alternative ports should be present.
if (uri.getPort() == defaultPort) {
uri = removePort(uri);
}
return uri;
}
private URI removePort(URI uri) throws StatusException {
try {
return new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), -1 /* port */,
uri.getPath(), uri.getQuery(), uri.getFragment());
} catch (URISyntaxException e) {
throw Status.UNAUTHENTICATED
.withDescription("Unable to construct service URI after removing port")
.withCause(e).asException();
}
}
private Map<String, List<String>> getRequestMetadata(URI uri) throws StatusException {
try {
return mCredentials.getRequestMetadata(uri);
} catch (IOException e) {
throw Status.UNAUTHENTICATED.withCause(e).asException();
}
}
private static Metadata toHeaders(Map<String, List<String>> metadata) {
Metadata headers = new Metadata();
if (metadata != null) {
for (String key : metadata.keySet()) {
Metadata.Key<String> headerKey = Metadata.Key.of(
key, Metadata.ASCII_STRING_MARSHALLER);
for (String value : metadata.get(key)) {
headers.put(headerKey, value);
}
}
}
return headers;
}
}
}
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.support.annotation.NonNull;
public class VoiceRecorder {
private static final int[] SAMPLE_RATE_CANDIDATES = new int[]{16000, 11025, 22050, 44100};
private static final int CHANNEL = AudioFormat.CHANNEL_IN_MONO;
private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT;
private static final int AMPLITUDE_THRESHOLD = 1500;
private static final int SPEECH_TIMEOUT_MILLIS = 2000;
private static final int MAX_SPEECH_LENGTH_MILLIS = 30 * 1000;
public static abstract class Callback {
/**
* Called when the recorder starts hearing voice.
*/
public void onVoiceStart() {
}
/**
* Called when the recorder is hearing voice.
*
* @param data The audio data in {@link AudioFormat#ENCODING_PCM_16BIT}.
* @param size The size of the actual data in {@code data}.
*/
public void onVoice(byte[] data, int size) {
}
/**
* Called when the recorder stops hearing voice.
*/
public void onVoiceEnd() {
}
}
private final Callback mCallback;
private AudioRecord mAudioRecord;
private Thread mThread;
private byte[] mBuffer;
private final Object mLock = new Object();
/** The timestamp of the last time that voice is heard. */
private long mLastVoiceHeardMillis = Long.MAX_VALUE;
/** The timestamp when the current voice is started. */
private long mVoiceStartedMillis;
public VoiceRecorder(@NonNull Callback callback) {
mCallback = callback;
}
/**
* Starts recording audio.
*
* <p>The caller is responsible for calling {@link #stop()} later.</p>
*/
public void start() {
// Stop recording if it is currently ongoing.
stop();
// Try to create a new recording session.
mAudioRecord = createAudioRecord();
if (mAudioRecord == null) {
throw new RuntimeException("Cannot instantiate VoiceRecorder");
}
// Start recording.
mAudioRecord.startRecording();
// Start processing the captured audio.
mThread = new Thread(new ProcessVoice());
mThread.start();
}
/**
* Stops recording audio.
*/
public void stop() {
synchronized (mLock) {
dismiss();
if (mThread != null) {
mThread.interrupt();
mThread = null;
}
if (mAudioRecord != null) {
mAudioRecord.stop();
mAudioRecord.release();
mAudioRecord = null;
}
mBuffer = null;
}
}
/**
* Dismisses the currently ongoing utterance.
*/
public void dismiss() {
if (mLastVoiceHeardMillis != Long.MAX_VALUE) {
mLastVoiceHeardMillis = Long.MAX_VALUE;
mCallback.onVoiceEnd();
}
}
/**
* Retrieves the sample rate currently used to record audio.
*
* @return The sample rate of recorded audio.
*/
public int getSampleRate() {
if (mAudioRecord != null) {
return mAudioRecord.getSampleRate();
}
return 0;
}
/**
* Creates a new {@link AudioRecord}.
*
* @return A newly created {@link AudioRecord}, or null if it cannot be created (missing
* permissions?).
*/
private AudioRecord createAudioRecord() {
for (int sampleRate : SAMPLE_RATE_CANDIDATES) {
final int sizeInBytes = AudioRecord.getMinBufferSize(sampleRate, CHANNEL, ENCODING);
if (sizeInBytes == AudioRecord.ERROR_BAD_VALUE) {
continue;
}
final AudioRecord audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC,
sampleRate, CHANNEL, ENCODING, sizeInBytes);
if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) {
mBuffer = new byte[sizeInBytes];
return audioRecord;
} else {
audioRecord.release();
}
}
return null;
}
/**
* Continuously processes the captured audio and notifies {@link #mCallback} of corresponding
* events.
*/
private class ProcessVoice implements Runnable {
@Override
public void run() {
while (true) {
synchronized (mLock) {
if (Thread.currentThread().isInterrupted()) {
break;
}
final int size = mAudioRecord.read(mBuffer, 0, mBuffer.length);
final long now = System.currentTimeMillis();
if (isHearingVoice(mBuffer, size)) {
if (mLastVoiceHeardMillis == Long.MAX_VALUE) {
mVoiceStartedMillis = now;
mCallback.onVoiceStart();
}
mCallback.onVoice(mBuffer, size);
mLastVoiceHeardMillis = now;
if (now - mVoiceStartedMillis > MAX_SPEECH_LENGTH_MILLIS) {
end();
}
} else if (mLastVoiceHeardMillis != Long.MAX_VALUE) {
mCallback.onVoice(mBuffer, size);
if (now - mLastVoiceHeardMillis > SPEECH_TIMEOUT_MILLIS) {
end();
}
}
}
}
}
private void end() {
mLastVoiceHeardMillis = Long.MAX_VALUE;
mCallback.onVoiceEnd();
}
private boolean isHearingVoice(byte[] buffer, int size) {
for (int i = 0; i < size - 1; i += 2) {
// The buffer has LINEAR16 in little endian.
int s = buffer[i + 1];
if (s < 0) s *= -1;
s <<= 8;
s += Math.abs(buffer[i]);
if (s > AMPLITUDE_THRESHOLD) {
return true;
}
}
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment