phsultan/streamingRecognize.js

## streamingRecognize.js
/**
 * Copyright 2017, Google, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

'use strict';

const start = function(client, request, readStream, recognizeStream) {
  console.log('[INFO] create recognizeStream');

  recognizeStream = client
    .streamingRecognize(request)
    .on('error', (error) => {
      console.log(`Got error ${error}`)
    })
    .on('unpipe', () => {
      console.log('[INFO] input stream unpiped output stream');
      start(client, request, readStream, recognizeStream);
    })
    .on('end', () => {
      console.log('[INFO] recognizeStream ended');
    })
    .on('data', data => {
      if (data.error && data.error.code) {
        console.log(`[ERROR] : ${JSON.stringify(data.error)}`);

        // Sending more than 65 seconds of audio generates this error
        if (data.error.code === 11) {
          console.log('[ERROR] Need to refresh recognizeStream because we sent to much data');
          readStream.unpipe();
          readStream.pause();
        }
      } else {
        console.log(
          `Transcription: ${data.results[0].alternatives[0].transcript}`
        );
      }
    });

    readStream.pipe(recognizeStream);
}

function streamingRecognize(filename, encoding, sampleRateHertz, languageCode) {
  // [START speech_streaming_recognize]
  const fs = require('fs');

  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech');

  // Creates a client
  const client = new speech.SpeechClient();

  /**
   * TODO(developer): Uncomment the following lines before running the sample.
   */
  // const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
  // const encoding = 'Encoding of the audio file, e.g. LINEAR16';
  // const sampleRateHertz = 16000;
  // const languageCode = 'BCP-47 language code, e.g. en-US';

  const request = {
    config: {
      encoding: encoding,
      sampleRateHertz: sampleRateHertz,
      languageCode: languageCode,
    },
    interimResults: true, // If you want interim results, set this to true
  };

  let time = 0;
  let recognizeStream = null;

  /**
  * Maximum bytes to get in the 'ondata' listener. We arbitrarly fetch 200 msec of data.
  * If modified, the setTimeout function in the 'ondata' listener must be adjusted accordingly.
  */
  let readStream = fs.createReadStream(filename, { highWaterMark: sampleRateHertz*2*0.2 });

  readStream.on('data', (chunk) => {
    //console.log(`[INFO] Received ${chunk.length} bytes of data.`);
    readStream.pause();
    setTimeout(() => {
      time += 200;

      if (time % 1000 === 0) {
        console.log(`[INFO] elapsed time : ${time/1000} sec`);
      }
      readStream.resume();
    }, 200);
  })
  .on('end',  () => {
    console.log('[INFO] No more data, ending recognizeStream');
    recognizeStream.end();
  })
  .on('close', () => {
    console.log('Stream closed');
    recognizeStream.end();
  })
  .on('error', console.error);

  start(client, request, readStream, recognizeStream);
}

require(`yargs`)
  .demand(2)
  .command(
    `stream <filename>`,
    `Detects speech in a local audio file by streaming it to the Speech API.`,
    {},
    opts =>
      streamingRecognize(
        opts.filename,
        opts.encoding,
        opts.sampleRateHertz,
        opts.languageCode
      )
  )
  .options({
    encoding: {
      alias: 'e',
      default: 'LINEAR16',
      global: true,
      requiresArg: true,
      type: 'string',
    },
    sampleRateHertz: {
      alias: 'r',
      default: 16000,
      global: true,
      requiresArg: true,
      type: 'number',
    },
    languageCode: {
      alias: 'l',
      default: 'en-US',
      global: true,
      requiresArg: true,
      type: 'string',
    },
  })
  .example(`node $0 stream ./resources/audio.raw  -e LINEAR16 -r 16000`)
  .wrap(120)
  .recommendCommands()
  .epilogue(`For more information, see https://cloud.google.com/speech/docs`)
  .help()
  .strict().argv;
	/**
	* Copyright 2017, Google, Inc.
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	'use strict';

	const start = function(client, request, readStream, recognizeStream) {
	console.log('[INFO] create recognizeStream');

	recognizeStream = client
	.streamingRecognize(request)
	.on('error', (error) => {
	console.log(`Got error ${error}`)
	})
	.on('unpipe', () => {
	console.log('[INFO] input stream unpiped output stream');
	start(client, request, readStream, recognizeStream);
	})
	.on('end', () => {
	console.log('[INFO] recognizeStream ended');
	})
	.on('data', data => {
	if (data.error && data.error.code) {
	console.log(`[ERROR] : ${JSON.stringify(data.error)}`);

	// Sending more than 65 seconds of audio generates this error
	if (data.error.code === 11) {
	console.log('[ERROR] Need to refresh recognizeStream because we sent to much data');
	readStream.unpipe();
	readStream.pause();
	}
	} else {
	console.log(
	`Transcription: ${data.results[0].alternatives[0].transcript}`
	);
	}
	});

	readStream.pipe(recognizeStream);
	}

	function streamingRecognize(filename, encoding, sampleRateHertz, languageCode) {
	// [START speech_streaming_recognize]
	const fs = require('fs');

	// Imports the Google Cloud client library
	const speech = require('@google-cloud/speech');

	// Creates a client
	const client = new speech.SpeechClient();

	/**
	* TODO(developer): Uncomment the following lines before running the sample.
	*/
	// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
	// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
	// const sampleRateHertz = 16000;
	// const languageCode = 'BCP-47 language code, e.g. en-US';

	const request = {
	config: {
	encoding: encoding,
	sampleRateHertz: sampleRateHertz,
	languageCode: languageCode,
	},
	interimResults: true, // If you want interim results, set this to true
	};

	let time = 0;
	let recognizeStream = null;

	/**
	* Maximum bytes to get in the 'ondata' listener. We arbitrarly fetch 200 msec of data.
	* If modified, the setTimeout function in the 'ondata' listener must be adjusted accordingly.
	*/
	let readStream = fs.createReadStream(filename, { highWaterMark: sampleRateHertz20.2 });

	readStream.on('data', (chunk) => {
	//console.log(`[INFO] Received ${chunk.length} bytes of data.`);
	readStream.pause();
	setTimeout(() => {
	time += 200;

	if (time % 1000 === 0) {
	console.log(`[INFO] elapsed time : ${time/1000} sec`);
	}
	readStream.resume();
	}, 200);
	})
	.on('end', () => {
	console.log('[INFO] No more data, ending recognizeStream');
	recognizeStream.end();
	})
	.on('close', () => {
	console.log('Stream closed');
	recognizeStream.end();
	})
	.on('error', console.error);

	start(client, request, readStream, recognizeStream);
	}

	require(`yargs`)
	.demand(2)
	.command(
	`stream <filename>`,
	`Detects speech in a local audio file by streaming it to the Speech API.`,
	{},
	opts =>
	streamingRecognize(
	opts.filename,
	opts.encoding,
	opts.sampleRateHertz,
	opts.languageCode
	)
	)
	.options({
	encoding: {
	alias: 'e',
	default: 'LINEAR16',
	global: true,
	requiresArg: true,
	type: 'string',
	},
	sampleRateHertz: {
	alias: 'r',
	default: 16000,
	global: true,
	requiresArg: true,
	type: 'number',
	},
	languageCode: {
	alias: 'l',
	default: 'en-US',
	global: true,
	requiresArg: true,
	type: 'string',
	},
	})
	.example(`node $0 stream ./resources/audio.raw -e LINEAR16 -r 16000`)
	.wrap(120)
	.recommendCommands()
	.epilogue(`For more information, see https://cloud.google.com/speech/docs`)
	.help()
	.strict().argv;