Created
January 24, 2018 12:37
-
-
Save lliepmah/1ec330fe516375b675f2407976336475 to your computer and use it in GitHub Desktop.
Speech Recognition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import android.Manifest; | |
import android.content.ComponentName; | |
import android.content.Intent; | |
import android.content.ServiceConnection; | |
import android.content.pm.PackageManager; | |
import android.content.res.Resources; | |
import android.os.Bundle; | |
import android.os.IBinder; | |
import android.support.annotation.NonNull; | |
import android.support.v4.app.ActivityCompat; | |
import android.support.v4.content.res.ResourcesCompat; | |
import android.support.v7.app.AppCompatActivity; | |
import android.support.v7.widget.LinearLayoutManager; | |
import android.support.v7.widget.RecyclerView; | |
import android.support.v7.widget.Toolbar; | |
import android.text.TextUtils; | |
import android.view.LayoutInflater; | |
import android.view.View; | |
import android.view.ViewGroup; | |
import android.widget.TextView; | |
import java.util.ArrayList; | |
import ru.kodep.speech.R; | |
public class MainActivity extends AppCompatActivity implements MessageDialogFragment.Listener { | |
private static final String FRAGMENT_MESSAGE_DIALOG = "message_dialog"; | |
private static final String STATE_RESULTS = "results"; | |
private static final int REQUEST_RECORD_AUDIO_PERMISSION = 1; | |
private SpeechService mSpeechService; | |
private VoiceRecorder mVoiceRecorder; | |
private final VoiceRecorder.Callback mVoiceCallback = new VoiceRecorder.Callback() { | |
@Override | |
public void onVoiceStart() { | |
showStatus(true); | |
if (mSpeechService != null) { | |
mSpeechService.startRecognizing(mVoiceRecorder.getSampleRate()); | |
} | |
} | |
@Override | |
public void onVoice(byte[] data, int size) { | |
if (mSpeechService != null) { | |
mSpeechService.recognize(data, size); | |
} | |
} | |
@Override | |
public void onVoiceEnd() { | |
showStatus(false); | |
if (mSpeechService != null) { | |
mSpeechService.finishRecognizing(); | |
} | |
} | |
}; | |
// Resource caches | |
private int mColorHearing; | |
private int mColorNotHearing; | |
// View references | |
private TextView mStatus; | |
private TextView mText; | |
private ResultAdapter mAdapter; | |
private RecyclerView mRecyclerView; | |
private final ServiceConnection mServiceConnection = new ServiceConnection() { | |
@Override | |
public void onServiceConnected(ComponentName componentName, IBinder binder) { | |
mSpeechService = SpeechService.from(binder); | |
mSpeechService.addListener(mSpeechServiceListener); | |
mStatus.setVisibility(View.VISIBLE); | |
} | |
@Override | |
public void onServiceDisconnected(ComponentName componentName) { | |
mSpeechService = null; | |
} | |
}; | |
@Override | |
protected void onCreate(Bundle savedInstanceState) { | |
super.onCreate(savedInstanceState); | |
setContentView(R.layout.activity_main); | |
final Resources resources = getResources(); | |
final Resources.Theme theme = getTheme(); | |
mColorHearing = ResourcesCompat.getColor(resources, R.color.status_hearing, theme); | |
mColorNotHearing = ResourcesCompat.getColor(resources, R.color.status_not_hearing, theme); | |
setSupportActionBar((Toolbar) findViewById(R.id.toolbar)); | |
mStatus = (TextView) findViewById(R.id.status); | |
mText = (TextView) findViewById(R.id.text); | |
mRecyclerView = (RecyclerView) findViewById(R.id.recycler_view); | |
mRecyclerView.setLayoutManager(new LinearLayoutManager(this)); | |
final ArrayList<String> results = savedInstanceState == null ? null : | |
savedInstanceState.getStringArrayList(STATE_RESULTS); | |
mAdapter = new ResultAdapter(results); | |
mRecyclerView.setAdapter(mAdapter); | |
} | |
@Override | |
protected void onStart() { | |
super.onStart(); | |
// Prepare Cloud Speech API | |
bindService(new Intent(this, SpeechService.class), mServiceConnection, BIND_AUTO_CREATE); | |
// Start listening to voices | |
if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) | |
== PackageManager.PERMISSION_GRANTED) { | |
startVoiceRecorder(); | |
} else if (ActivityCompat.shouldShowRequestPermissionRationale(this, | |
Manifest.permission.RECORD_AUDIO)) { | |
showPermissionMessageDialog(); | |
} else { | |
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, | |
REQUEST_RECORD_AUDIO_PERMISSION); | |
} | |
} | |
@Override | |
protected void onStop() { | |
// Stop listening to voice | |
stopVoiceRecorder(); | |
// Stop Cloud Speech API | |
mSpeechService.removeListener(mSpeechServiceListener); | |
unbindService(mServiceConnection); | |
mSpeechService = null; | |
super.onStop(); | |
} | |
@Override | |
protected void onSaveInstanceState(Bundle outState) { | |
super.onSaveInstanceState(outState); | |
if (mAdapter != null) { | |
outState.putStringArrayList(STATE_RESULTS, mAdapter.getResults()); | |
} | |
} | |
@Override | |
public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, | |
@NonNull int[] grantResults) { | |
if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) { | |
if (permissions.length == 1 && grantResults.length == 1 | |
&& grantResults[0] == PackageManager.PERMISSION_GRANTED) { | |
startVoiceRecorder(); | |
} else { | |
showPermissionMessageDialog(); | |
} | |
} else { | |
super.onRequestPermissionsResult(requestCode, permissions, grantResults); | |
} | |
} | |
private void startVoiceRecorder() { | |
if (mVoiceRecorder != null) { | |
mVoiceRecorder.stop(); | |
} | |
mVoiceRecorder = new VoiceRecorder(mVoiceCallback); | |
mVoiceRecorder.start(); | |
} | |
private void stopVoiceRecorder() { | |
if (mVoiceRecorder != null) { | |
mVoiceRecorder.stop(); | |
mVoiceRecorder = null; | |
} | |
} | |
private void showPermissionMessageDialog() { | |
MessageDialogFragment | |
.newInstance(getString(R.string.permission_message)) | |
.show(getSupportFragmentManager(), FRAGMENT_MESSAGE_DIALOG); | |
} | |
private void showStatus(final boolean hearingVoice) { | |
runOnUiThread(new Runnable() { | |
@Override | |
public void run() { | |
mStatus.setTextColor(hearingVoice ? mColorHearing : mColorNotHearing); | |
} | |
}); | |
} | |
@Override | |
public void onMessageDialogDismissed() { | |
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, | |
REQUEST_RECORD_AUDIO_PERMISSION); | |
} | |
private final SpeechService.Listener mSpeechServiceListener = | |
new SpeechService.Listener() { | |
@Override | |
public void onSpeechRecognized(final String text, final boolean isFinal) { | |
if (isFinal) { | |
mVoiceRecorder.dismiss(); | |
} | |
if (mText != null && !TextUtils.isEmpty(text)) { | |
runOnUiThread(new Runnable() { | |
@Override | |
public void run() { | |
if (isFinal) { | |
mText.setText(null); | |
mAdapter.addResult(text); | |
mRecyclerView.smoothScrollToPosition(0); | |
} else { | |
mText.setText(text); | |
} | |
} | |
}); | |
} | |
} | |
}; | |
private static class ViewHolder extends RecyclerView.ViewHolder { | |
TextView text; | |
ViewHolder(LayoutInflater inflater, ViewGroup parent) { | |
super(inflater.inflate(R.layout.item_result, parent, false)); | |
text = (TextView) itemView.findViewById(R.id.text); | |
} | |
} | |
private static class ResultAdapter extends RecyclerView.Adapter<ViewHolder> { | |
private final ArrayList<String> mResults = new ArrayList<>(); | |
ResultAdapter(ArrayList<String> results) { | |
if (results != null) { | |
mResults.addAll(results); | |
} | |
} | |
@Override | |
public ViewHolder onCreateViewHolder(ViewGroup parent, int viewType) { | |
return new ViewHolder(LayoutInflater.from(parent.getContext()), parent); | |
} | |
@Override | |
public void onBindViewHolder(ViewHolder holder, int position) { | |
holder.text.setText(mResults.get(position)); | |
} | |
@Override | |
public int getItemCount() { | |
return mResults.size(); | |
} | |
void addResult(String result) { | |
mResults.add(0, result); | |
notifyItemInserted(0); | |
} | |
public ArrayList<String> getResults() { | |
return mResults; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import android.app.Dialog; | |
import android.content.DialogInterface; | |
import android.os.Bundle; | |
import android.support.annotation.NonNull; | |
import android.support.v7.app.AlertDialog; | |
import android.support.v7.app.AppCompatDialogFragment; | |
public class MessageDialogFragment extends AppCompatDialogFragment { | |
public interface Listener { | |
/** | |
* Called when the dialog is dismissed. | |
*/ | |
void onMessageDialogDismissed(); | |
} | |
private static final String ARG_MESSAGE = "message"; | |
/** | |
* Creates a new instance of {@link MessageDialogFragment}. | |
* | |
* @param message The message to be shown on the dialog. | |
* @return A newly created dialog fragment. | |
*/ | |
public static MessageDialogFragment newInstance(String message) { | |
final MessageDialogFragment fragment = new MessageDialogFragment(); | |
final Bundle args = new Bundle(); | |
args.putString(ARG_MESSAGE, message); | |
fragment.setArguments(args); | |
return fragment; | |
} | |
@NonNull | |
@Override | |
public Dialog onCreateDialog(Bundle savedInstanceState) { | |
return new AlertDialog.Builder(getContext()) | |
.setMessage(getArguments().getString(ARG_MESSAGE)) | |
.setPositiveButton(android.R.string.ok, new DialogInterface.OnClickListener() { | |
@Override | |
public void onClick(DialogInterface dialog, int which) { | |
((Listener) getActivity()).onMessageDialogDismissed(); | |
} | |
}) | |
.setOnDismissListener(new DialogInterface.OnDismissListener() { | |
@Override | |
public void onDismiss(DialogInterface dialogInterface) { | |
((Listener) getActivity()).onMessageDialogDismissed(); | |
} | |
}) | |
.create(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import android.app.Service; | |
import android.content.Context; | |
import android.content.Intent; | |
import android.content.SharedPreferences; | |
import android.os.AsyncTask; | |
import android.os.Binder; | |
import android.os.Handler; | |
import android.os.IBinder; | |
import android.support.annotation.NonNull; | |
import android.support.annotation.Nullable; | |
import android.text.TextUtils; | |
import android.util.Log; | |
import com.google.auth.Credentials; | |
import com.google.auth.oauth2.AccessToken; | |
import com.google.auth.oauth2.GoogleCredentials; | |
import com.google.cloud.speech.v1.RecognitionAudio; | |
import com.google.cloud.speech.v1.RecognitionConfig; | |
import com.google.cloud.speech.v1.RecognizeRequest; | |
import com.google.cloud.speech.v1.RecognizeResponse; | |
import com.google.cloud.speech.v1.SpeechGrpc; | |
import com.google.cloud.speech.v1.SpeechRecognitionAlternative; | |
import com.google.cloud.speech.v1.SpeechRecognitionResult; | |
import com.google.cloud.speech.v1.StreamingRecognitionConfig; | |
import com.google.cloud.speech.v1.StreamingRecognitionResult; | |
import com.google.cloud.speech.v1.StreamingRecognizeRequest; | |
import com.google.cloud.speech.v1.StreamingRecognizeResponse; | |
import com.google.protobuf.ByteString; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.net.URI; | |
import java.net.URISyntaxException; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.Date; | |
import java.util.List; | |
import java.util.Locale; | |
import java.util.Map; | |
import java.util.concurrent.TimeUnit; | |
import io.grpc.CallOptions; | |
import io.grpc.Channel; | |
import io.grpc.ClientCall; | |
import io.grpc.ClientInterceptor; | |
import io.grpc.ClientInterceptors; | |
import io.grpc.ManagedChannel; | |
import io.grpc.Metadata; | |
import io.grpc.MethodDescriptor; | |
import io.grpc.Status; | |
import io.grpc.StatusException; | |
import io.grpc.internal.DnsNameResolverProvider; | |
import io.grpc.okhttp.OkHttpChannelProvider; | |
import io.grpc.stub.StreamObserver; | |
import ru.kodep.speech.R; | |
public class SpeechService extends Service { | |
public interface Listener { | |
void onSpeechRecognized(String text, boolean isFinal); | |
} | |
private static final String TAG = "SpeechService"; | |
private static final String PREFS = "SpeechService"; | |
private static final String PREF_ACCESS_TOKEN_VALUE = "access_token_value"; | |
private static final String PREF_ACCESS_TOKEN_EXPIRATION_TIME = "access_token_expiration_time"; | |
/** We reuse an access token if its expiration time is longer than this. */ | |
private static final int ACCESS_TOKEN_EXPIRATION_TOLERANCE = 30 * 60 * 1000; // thirty minutes | |
/** We refresh the current access token before it expires. */ | |
private static final int ACCESS_TOKEN_FETCH_MARGIN = 60 * 1000; // one minute | |
public static final List<String> SCOPE = | |
Collections.singletonList("https://www.googleapis.com/auth/cloud-platform"); | |
private static final String HOSTNAME = "speech.googleapis.com"; | |
private static final int PORT = 443; | |
private final SpeechBinder mBinder = new SpeechBinder(); | |
private final ArrayList<Listener> mListeners = new ArrayList<>(); | |
private volatile AccessTokenTask mAccessTokenTask; | |
private SpeechGrpc.SpeechStub mApi; | |
private static Handler mHandler; | |
private final StreamObserver<StreamingRecognizeResponse> mResponseObserver | |
= new StreamObserver<StreamingRecognizeResponse>() { | |
@Override | |
public void onNext(StreamingRecognizeResponse response) { | |
String text = null; | |
boolean isFinal = false; | |
if (response.getResultsCount() > 0) { | |
final StreamingRecognitionResult result = response.getResults(0); | |
isFinal = result.getIsFinal(); | |
if (result.getAlternativesCount() > 0) { | |
final SpeechRecognitionAlternative alternative = result.getAlternatives(0); | |
text = alternative.getTranscript(); | |
} | |
} | |
if (text != null) { | |
for (Listener listener : mListeners) { | |
listener.onSpeechRecognized(text, isFinal); | |
} | |
} | |
} | |
@Override | |
public void onError(Throwable t) { | |
Log.e(TAG, "Error calling the API.", t); | |
} | |
@Override | |
public void onCompleted() { | |
Log.i(TAG, "API completed."); | |
} | |
}; | |
private final StreamObserver<RecognizeResponse> mFileResponseObserver | |
= new StreamObserver<RecognizeResponse>() { | |
@Override | |
public void onNext(RecognizeResponse response) { | |
String text = null; | |
if (response.getResultsCount() > 0) { | |
final SpeechRecognitionResult result = response.getResults(0); | |
if (result.getAlternativesCount() > 0) { | |
final SpeechRecognitionAlternative alternative = result.getAlternatives(0); | |
text = alternative.getTranscript(); | |
} | |
} | |
if (text != null) { | |
for (Listener listener : mListeners) { | |
listener.onSpeechRecognized(text, true); | |
} | |
} | |
} | |
@Override | |
public void onError(Throwable t) { | |
Log.e(TAG, "Error calling the API.", t); | |
} | |
@Override | |
public void onCompleted() { | |
Log.i(TAG, "API completed."); | |
} | |
}; | |
private StreamObserver<StreamingRecognizeRequest> mRequestObserver; | |
public static SpeechService from(IBinder binder) { | |
return ((SpeechBinder) binder).getService(); | |
} | |
@Override | |
public void onCreate() { | |
super.onCreate(); | |
mHandler = new Handler(); | |
fetchAccessToken(); | |
} | |
@Override | |
public void onDestroy() { | |
super.onDestroy(); | |
mHandler.removeCallbacks(mFetchAccessTokenRunnable); | |
mHandler = null; | |
// Release the gRPC channel. | |
if (mApi != null) { | |
final ManagedChannel channel = (ManagedChannel) mApi.getChannel(); | |
if (channel != null && !channel.isShutdown()) { | |
try { | |
channel.shutdown().awaitTermination(5, TimeUnit.SECONDS); | |
} catch (InterruptedException e) { | |
Log.e(TAG, "Error shutting down the gRPC channel.", e); | |
} | |
} | |
mApi = null; | |
} | |
} | |
private void fetchAccessToken() { | |
if (mAccessTokenTask != null) { | |
return; | |
} | |
mAccessTokenTask = new AccessTokenTask(); | |
mAccessTokenTask.execute(); | |
} | |
private String getDefaultLanguageCode() { | |
final Locale locale = Locale.getDefault(); | |
final StringBuilder language = new StringBuilder(locale.getLanguage()); | |
final String country = locale.getCountry(); | |
if (!TextUtils.isEmpty(country)) { | |
language.append("-"); | |
language.append(country); | |
} | |
return language.toString(); | |
} | |
@Nullable | |
@Override | |
public IBinder onBind(Intent intent) { | |
return mBinder; | |
} | |
public void addListener(@NonNull Listener listener) { | |
mListeners.add(listener); | |
} | |
public void removeListener(@NonNull Listener listener) { | |
mListeners.remove(listener); | |
} | |
/** | |
* Starts recognizing speech audio. | |
* | |
* @param sampleRate The sample rate of the audio. | |
*/ | |
public void startRecognizing(int sampleRate) { | |
if (mApi == null) { | |
Log.w(TAG, "API not ready. Ignoring the request."); | |
return; | |
} | |
// Configure the API | |
mRequestObserver = mApi.streamingRecognize(mResponseObserver); | |
mRequestObserver.onNext(StreamingRecognizeRequest.newBuilder() | |
.setStreamingConfig(StreamingRecognitionConfig.newBuilder() | |
.setConfig(RecognitionConfig.newBuilder() | |
.setLanguageCode(getDefaultLanguageCode()) | |
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16) | |
.setSampleRateHertz(sampleRate) | |
.build()) | |
.setInterimResults(true) | |
.setSingleUtterance(true) | |
.build()) | |
.build()); | |
} | |
/** | |
* Recognizes the speech audio. This method should be called every time a chunk of byte buffer | |
* is ready. | |
* | |
* @param data The audio data. | |
* @param size The number of elements that are actually relevant in the {@code data}. | |
*/ | |
public void recognize(byte[] data, int size) { | |
if (mRequestObserver == null) { | |
return; | |
} | |
// Call the streaming recognition API | |
mRequestObserver.onNext(StreamingRecognizeRequest.newBuilder() | |
.setAudioContent(ByteString.copyFrom(data, 0, size)) | |
.build()); | |
} | |
/** | |
* Finishes recognizing speech audio. | |
*/ | |
public void finishRecognizing() { | |
if (mRequestObserver == null) { | |
return; | |
} | |
mRequestObserver.onCompleted(); | |
mRequestObserver = null; | |
} | |
/** | |
* Recognize all data from the specified {@link InputStream}. | |
* | |
* @param stream The audio data. | |
*/ | |
public void recognizeInputStream(InputStream stream) { | |
try { | |
mApi.recognize( | |
RecognizeRequest.newBuilder() | |
.setConfig(RecognitionConfig.newBuilder() | |
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16) | |
.setLanguageCode("en-US") | |
.setSampleRateHertz(16000) | |
.build()) | |
.setAudio(RecognitionAudio.newBuilder() | |
.setContent(ByteString.readFrom(stream)) | |
.build()) | |
.build(), | |
mFileResponseObserver); | |
} catch (IOException e) { | |
Log.e(TAG, "Error loading the input", e); | |
} | |
} | |
private class SpeechBinder extends Binder { | |
SpeechService getService() { | |
return SpeechService.this; | |
} | |
} | |
private final Runnable mFetchAccessTokenRunnable = new Runnable() { | |
@Override | |
public void run() { | |
fetchAccessToken(); | |
} | |
}; | |
private class AccessTokenTask extends AsyncTask<Void, Void, AccessToken> { | |
@Override | |
protected AccessToken doInBackground(Void... voids) { | |
final SharedPreferences prefs = | |
getSharedPreferences(PREFS, Context.MODE_PRIVATE); | |
String tokenValue = prefs.getString(PREF_ACCESS_TOKEN_VALUE, null); | |
long expirationTime = prefs.getLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME, -1); | |
// Check if the current token is still valid for a while | |
if (tokenValue != null && expirationTime > 0) { | |
if (expirationTime | |
> System.currentTimeMillis() + ACCESS_TOKEN_EXPIRATION_TOLERANCE) { | |
return new AccessToken(tokenValue, new Date(expirationTime)); | |
} | |
} | |
// ***** WARNING ***** | |
// In this sample, we load the credential from a JSON file stored in a raw resource | |
// folder of this client app. You should never do this in your app. Instead, store | |
// the file in your server and obtain an access token from there. | |
// ******************* | |
final InputStream stream = getResources().openRawResource(R.raw.credential); | |
try { | |
final GoogleCredentials credentials = GoogleCredentials.fromStream(stream) | |
.createScoped(SCOPE); | |
final AccessToken token = credentials.refreshAccessToken(); | |
prefs.edit() | |
.putString(PREF_ACCESS_TOKEN_VALUE, token.getTokenValue()) | |
.putLong(PREF_ACCESS_TOKEN_EXPIRATION_TIME, | |
token.getExpirationTime().getTime()) | |
.apply(); | |
return token; | |
} catch (IOException e) { | |
Log.e(TAG, "Failed to obtain access token.", e); | |
} | |
return null; | |
} | |
@Override | |
protected void onPostExecute(AccessToken accessToken) { | |
mAccessTokenTask = null; | |
final ManagedChannel channel = new OkHttpChannelProvider() | |
.builderForAddress(HOSTNAME, PORT) | |
.nameResolverFactory(new DnsNameResolverProvider()) | |
.intercept(new GoogleCredentialsInterceptor(new GoogleCredentials(accessToken) | |
.createScoped(SCOPE))) | |
.build(); | |
mApi = SpeechGrpc.newStub(channel); | |
// Schedule access token refresh before it expires | |
if (mHandler != null) { | |
mHandler.postDelayed(mFetchAccessTokenRunnable, | |
Math.max(accessToken.getExpirationTime().getTime() | |
- System.currentTimeMillis() | |
- ACCESS_TOKEN_FETCH_MARGIN, ACCESS_TOKEN_EXPIRATION_TOLERANCE)); | |
} | |
} | |
} | |
/** | |
* Authenticates the gRPC channel using the specified {@link GoogleCredentials}. | |
*/ | |
private static class GoogleCredentialsInterceptor implements ClientInterceptor { | |
private final Credentials mCredentials; | |
private Metadata mCached; | |
private Map<String, List<String>> mLastMetadata; | |
GoogleCredentialsInterceptor(Credentials credentials) { | |
mCredentials = credentials; | |
} | |
@Override | |
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall( | |
final MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, | |
final Channel next) { | |
return new ClientInterceptors.CheckedForwardingClientCall<ReqT, RespT>( | |
next.newCall(method, callOptions)) { | |
@Override | |
protected void checkedStart(Listener<RespT> responseListener, Metadata headers) | |
throws StatusException { | |
Metadata cachedSaved; | |
URI uri = serviceUri(next, method); | |
synchronized (this) { | |
Map<String, List<String>> latestMetadata = getRequestMetadata(uri); | |
if (mLastMetadata == null || mLastMetadata != latestMetadata) { | |
mLastMetadata = latestMetadata; | |
mCached = toHeaders(mLastMetadata); | |
} | |
cachedSaved = mCached; | |
} | |
headers.merge(cachedSaved); | |
delegate().start(responseListener, headers); | |
} | |
}; | |
} | |
/** | |
* Generate a JWT-specific service URI. The URI is simply an identifier with enough | |
* information for a service to know that the JWT was intended for it. The URI will | |
* commonly be verified with a simple string equality check. | |
*/ | |
private URI serviceUri(Channel channel, MethodDescriptor<?, ?> method) | |
throws StatusException { | |
String authority = channel.authority(); | |
if (authority == null) { | |
throw Status.UNAUTHENTICATED | |
.withDescription("Channel has no authority") | |
.asException(); | |
} | |
// Always use HTTPS, by definition. | |
final String scheme = "https"; | |
final int defaultPort = 443; | |
String path = "/" + MethodDescriptor.extractFullServiceName(method.getFullMethodName()); | |
URI uri; | |
try { | |
uri = new URI(scheme, authority, path, null, null); | |
} catch (URISyntaxException e) { | |
throw Status.UNAUTHENTICATED | |
.withDescription("Unable to construct service URI for auth") | |
.withCause(e).asException(); | |
} | |
// The default port must not be present. Alternative ports should be present. | |
if (uri.getPort() == defaultPort) { | |
uri = removePort(uri); | |
} | |
return uri; | |
} | |
private URI removePort(URI uri) throws StatusException { | |
try { | |
return new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), -1 /* port */, | |
uri.getPath(), uri.getQuery(), uri.getFragment()); | |
} catch (URISyntaxException e) { | |
throw Status.UNAUTHENTICATED | |
.withDescription("Unable to construct service URI after removing port") | |
.withCause(e).asException(); | |
} | |
} | |
private Map<String, List<String>> getRequestMetadata(URI uri) throws StatusException { | |
try { | |
return mCredentials.getRequestMetadata(uri); | |
} catch (IOException e) { | |
throw Status.UNAUTHENTICATED.withCause(e).asException(); | |
} | |
} | |
private static Metadata toHeaders(Map<String, List<String>> metadata) { | |
Metadata headers = new Metadata(); | |
if (metadata != null) { | |
for (String key : metadata.keySet()) { | |
Metadata.Key<String> headerKey = Metadata.Key.of( | |
key, Metadata.ASCII_STRING_MARSHALLER); | |
for (String value : metadata.get(key)) { | |
headers.put(headerKey, value); | |
} | |
} | |
} | |
return headers; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import android.media.AudioFormat; | |
import android.media.AudioRecord; | |
import android.media.MediaRecorder; | |
import android.support.annotation.NonNull; | |
public class VoiceRecorder { | |
private static final int[] SAMPLE_RATE_CANDIDATES = new int[]{16000, 11025, 22050, 44100}; | |
private static final int CHANNEL = AudioFormat.CHANNEL_IN_MONO; | |
private static final int ENCODING = AudioFormat.ENCODING_PCM_16BIT; | |
private static final int AMPLITUDE_THRESHOLD = 1500; | |
private static final int SPEECH_TIMEOUT_MILLIS = 2000; | |
private static final int MAX_SPEECH_LENGTH_MILLIS = 30 * 1000; | |
public static abstract class Callback { | |
/** | |
* Called when the recorder starts hearing voice. | |
*/ | |
public void onVoiceStart() { | |
} | |
/** | |
* Called when the recorder is hearing voice. | |
* | |
* @param data The audio data in {@link AudioFormat#ENCODING_PCM_16BIT}. | |
* @param size The size of the actual data in {@code data}. | |
*/ | |
public void onVoice(byte[] data, int size) { | |
} | |
/** | |
* Called when the recorder stops hearing voice. | |
*/ | |
public void onVoiceEnd() { | |
} | |
} | |
private final Callback mCallback; | |
private AudioRecord mAudioRecord; | |
private Thread mThread; | |
private byte[] mBuffer; | |
private final Object mLock = new Object(); | |
/** The timestamp of the last time that voice is heard. */ | |
private long mLastVoiceHeardMillis = Long.MAX_VALUE; | |
/** The timestamp when the current voice is started. */ | |
private long mVoiceStartedMillis; | |
public VoiceRecorder(@NonNull Callback callback) { | |
mCallback = callback; | |
} | |
/** | |
* Starts recording audio. | |
* | |
* <p>The caller is responsible for calling {@link #stop()} later.</p> | |
*/ | |
public void start() { | |
// Stop recording if it is currently ongoing. | |
stop(); | |
// Try to create a new recording session. | |
mAudioRecord = createAudioRecord(); | |
if (mAudioRecord == null) { | |
throw new RuntimeException("Cannot instantiate VoiceRecorder"); | |
} | |
// Start recording. | |
mAudioRecord.startRecording(); | |
// Start processing the captured audio. | |
mThread = new Thread(new ProcessVoice()); | |
mThread.start(); | |
} | |
/** | |
* Stops recording audio. | |
*/ | |
public void stop() { | |
synchronized (mLock) { | |
dismiss(); | |
if (mThread != null) { | |
mThread.interrupt(); | |
mThread = null; | |
} | |
if (mAudioRecord != null) { | |
mAudioRecord.stop(); | |
mAudioRecord.release(); | |
mAudioRecord = null; | |
} | |
mBuffer = null; | |
} | |
} | |
/** | |
* Dismisses the currently ongoing utterance. | |
*/ | |
public void dismiss() { | |
if (mLastVoiceHeardMillis != Long.MAX_VALUE) { | |
mLastVoiceHeardMillis = Long.MAX_VALUE; | |
mCallback.onVoiceEnd(); | |
} | |
} | |
/** | |
* Retrieves the sample rate currently used to record audio. | |
* | |
* @return The sample rate of recorded audio. | |
*/ | |
public int getSampleRate() { | |
if (mAudioRecord != null) { | |
return mAudioRecord.getSampleRate(); | |
} | |
return 0; | |
} | |
/** | |
* Creates a new {@link AudioRecord}. | |
* | |
* @return A newly created {@link AudioRecord}, or null if it cannot be created (missing | |
* permissions?). | |
*/ | |
private AudioRecord createAudioRecord() { | |
for (int sampleRate : SAMPLE_RATE_CANDIDATES) { | |
final int sizeInBytes = AudioRecord.getMinBufferSize(sampleRate, CHANNEL, ENCODING); | |
if (sizeInBytes == AudioRecord.ERROR_BAD_VALUE) { | |
continue; | |
} | |
final AudioRecord audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, | |
sampleRate, CHANNEL, ENCODING, sizeInBytes); | |
if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) { | |
mBuffer = new byte[sizeInBytes]; | |
return audioRecord; | |
} else { | |
audioRecord.release(); | |
} | |
} | |
return null; | |
} | |
/** | |
* Continuously processes the captured audio and notifies {@link #mCallback} of corresponding | |
* events. | |
*/ | |
private class ProcessVoice implements Runnable { | |
@Override | |
public void run() { | |
while (true) { | |
synchronized (mLock) { | |
if (Thread.currentThread().isInterrupted()) { | |
break; | |
} | |
final int size = mAudioRecord.read(mBuffer, 0, mBuffer.length); | |
final long now = System.currentTimeMillis(); | |
if (isHearingVoice(mBuffer, size)) { | |
if (mLastVoiceHeardMillis == Long.MAX_VALUE) { | |
mVoiceStartedMillis = now; | |
mCallback.onVoiceStart(); | |
} | |
mCallback.onVoice(mBuffer, size); | |
mLastVoiceHeardMillis = now; | |
if (now - mVoiceStartedMillis > MAX_SPEECH_LENGTH_MILLIS) { | |
end(); | |
} | |
} else if (mLastVoiceHeardMillis != Long.MAX_VALUE) { | |
mCallback.onVoice(mBuffer, size); | |
if (now - mLastVoiceHeardMillis > SPEECH_TIMEOUT_MILLIS) { | |
end(); | |
} | |
} | |
} | |
} | |
} | |
private void end() { | |
mLastVoiceHeardMillis = Long.MAX_VALUE; | |
mCallback.onVoiceEnd(); | |
} | |
private boolean isHearingVoice(byte[] buffer, int size) { | |
for (int i = 0; i < size - 1; i += 2) { | |
// The buffer has LINEAR16 in little endian. | |
int s = buffer[i + 1]; | |
if (s < 0) s *= -1; | |
s <<= 8; | |
s += Math.abs(buffer[i]); | |
if (s > AMPLITUDE_THRESHOLD) { | |
return true; | |
} | |
} | |
return false; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment