pietrop/spoken_data_node_sdk.js

## spoken_data_node_sdk.js
/**
* Minimal SDK to interact with spoken data's spekaer diarization service.
*
* This SDK I/O. works in two pars. first you send the video/audio receive a `uid`. that you can use to check status of transcription and retrieve json.
*
* Input: the API takes in a video or audio file (2048MB per file limit, file extensions: wav, mp3, mp4, ac3, avi, mpg, wmv, flv, mkv)
* output: a uid
*
* input: uid
* output: status of transcription is the first param returned. if `done` returns transcript/speaker diarization as second param.
*
* to learn more about spoken data's pricing visit https://spokendata.com/pricing
* Spekaer diarization is free service. transcription is subject to free trial/available credit on the system.
Transcription is not word accurate. However it does contain punctuation.
* if no credit on spoken data or run out free trial, then text fields will be empty but speaker diarization will still be available in response.
*
* This is a reduced SDK from original one made for quickQuoteNode usecase  https://github.com/pietrop/quickQuoteNode/blob/master/lib/interactive_video_components/processing/speech_to_text_api/src/spoken_data_sdk.js
*
* also see https://spokendata.com/api-for-developers for more on the API.
*
*
* Example usage
```javascript
//Require spoken data

var SpokenData = require('./index.js').SpokenData;

//Initialise spoken data with API keys and user
var spokenData = new SpokenData({
  key:  "",
  userid: ""
})
```

```javascript
// Usage, post a video

var testVideo ="";

var uid;

// send video to transcribe
spokenData.addNewRecording(testVideo, function(res){
  //save uid to retrieve transcript/speaker diarization at later stage when ready
  uid = res;
  console.log(res);

  if(uid){
    console.log("defined uid " +uid)
  }else{
    console.log("undefined uid " +uid)
  }

});
```

```javascript
//Usage, retrieve a video transcript/speaker diarization with a UID

var uidExample = "11479"

spokenData.checkRecordingStatus(uidExample,function(status, json){
  //status can be `done`,`failed`, `processing`.
  if(status == "done"){
    console.log("done")
    if(json){
      console.log(JSON.stringify(json))
    }
  }else if(status =="failed"){
    console.log("Failed");
  }else if(status == "processing"){
    console.log("Processing");
  }
});
```
*/

var request = require('request');
var fs = require('fs');
var parser = require('xml2json');
// TODO: replace xml2js with xml2json through the code
var parseString = require('xml2js').parseString;


// var SpokenData = function(config) {

function SpokenData(config){
  this.key 		= config.key;
  this.userid 	= config.userid;
  this.baseurl ="https://spokendata.com/api/"
}

/**
* given a video file sends it to get transcribed.
* returns the uid to be able to retrive it once transcription is done.
*/
SpokenData.prototype.addNewRecording = function  (fileName, callback){

var putUrl =   this.baseurl+this.userid+"/"+this.key+"/recording/put?filename="+fileName+"&language=english";

  postRequest(putUrl,fileName, function (result){
  // console.log("result");
    //  console.log(result);
     callback(result.data.recording[0].$.id);
  });//postRequest
}


/**
* Checks if recording has been processed.
* takes in uid as param
* if the recording is done  it then returns the xml of the url to retrieve the transcription as second param of the callback.
first param of the callback is status of recording it can be
`done`,`failed`, `processing`.
*/
SpokenData.prototype.checkRecordingStatus = function(uid, cb){

  var url = this.baseurl+this.userid+"/"+this.key+"/recording/"+uid;

  request(url, function (error, response, body) {
    if (!error && response.statusCode == 200) {
      parseString(body, function (err, result) {
          var status =  result['data']['recording'][0]['status'][0];
          if(status == "failed"){
              return cb(status);
          }else if(status == "done"){
              var xml_url = result.data.recording[0].annotation_xml_url[0];
              //get request , retrieves XML transcript from URL.
              request(xml_url, function (error, response, body) {
                  if (!error && response.statusCode == 200) {

                    // returns it as json
                    var speakerDiarizationJson = JSON.parse(parser.toJson(body));

                    return cb(status,speakerDiarizationJson );
                    // return cb(status,body );
                  }//if
                })//anonimous function handling response
              //end of get request
          }else if (status =="processing"){
              return cb(status);
          }else{
            return status;
          }//if else
      });//parseString
    }//if
  });//anonimous function handling response
}


SpokenData.prototype.getTranscriptionXml = function(url, cb){

}


/**
* Helper Function post request
*/
function postRequest(postUrl, fileName, callback){
  // var fpath = __dirname + '/' + fileName;
  var fpath = fileName;//fpath
  var fstat = fs.statSync(fpath);
  var fsize = fstat['size'];

	//request.put
	var req = request.put(postUrl, {headers: { 'content-length': fsize }},

  function(err, httpResponse, body) {
	  // console.log(httpResponse.statusCode);
	  // console.log(body);
	  if (err) {
	    return console.error('upload failed:', err);
	  }
  	  // console.log('Server responded with:', body);
  	  // checks if callback has been passed to the function
  	  //if it has been passed then pass body as argument to callback.
      // var uid = parseString(body);
      parseString(body, function (err, result) {
        if(callback){callback(result)}else{return result};
      });
  });
  fs.createReadStream(fpath).pipe(req);
}//postRequest


exports.SpokenData = SpokenData;
	/**
	* Minimal SDK to interact with spoken data's spekaer diarization service.
	*
	* This SDK I/O. works in two pars. first you send the video/audio receive a `uid`. that you can use to check status of transcription and retrieve json.
	*
	* Input: the API takes in a video or audio file (2048MB per file limit, file extensions: wav, mp3, mp4, ac3, avi, mpg, wmv, flv, mkv)
	* output: a uid
	*
	* input: uid
	* output: status of transcription is the first param returned. if `done` returns transcript/speaker diarization as second param.
	*
	* to learn more about spoken data's pricing visit https://spokendata.com/pricing
	* Spekaer diarization is free service. transcription is subject to free trial/available credit on the system.
	Transcription is not word accurate. However it does contain punctuation.
	* if no credit on spoken data or run out free trial, then text fields will be empty but speaker diarization will still be available in response.
	*
	* This is a reduced SDK from original one made for quickQuoteNode usecase https://github.com/pietrop/quickQuoteNode/blob/master/lib/interactive_video_components/processing/speech_to_text_api/src/spoken_data_sdk.js
	*
	* also see https://spokendata.com/api-for-developers for more on the API.
	*
	*
	* Example usage
	```javascript
	//Require spoken data

	var SpokenData = require('./index.js').SpokenData;

	//Initialise spoken data with API keys and user
	var spokenData = new SpokenData({
	key: "",
	userid: ""
	})
	```

	```javascript
	// Usage, post a video

	var testVideo ="";

	var uid;

	// send video to transcribe
	spokenData.addNewRecording(testVideo, function(res){
	//save uid to retrieve transcript/speaker diarization at later stage when ready
	uid = res;
	console.log(res);

	if(uid){
	console.log("defined uid " +uid)
	}else{
	console.log("undefined uid " +uid)
	}

	});
	```

	```javascript
	//Usage, retrieve a video transcript/speaker diarization with a UID

	var uidExample = "11479"

	spokenData.checkRecordingStatus(uidExample,function(status, json){
	//status can be `done`,`failed`, `processing`.
	if(status == "done"){
	console.log("done")
	if(json){
	console.log(JSON.stringify(json))
	}
	}else if(status =="failed"){
	console.log("Failed");
	}else if(status == "processing"){
	console.log("Processing");
	}
	});
	```
	*/

	var request = require('request');
	var fs = require('fs');
	var parser = require('xml2json');
	// TODO: replace xml2js with xml2json through the code
	var parseString = require('xml2js').parseString;


	// var SpokenData = function(config) {

	function SpokenData(config){
	this.key = config.key;
	this.userid = config.userid;
	this.baseurl ="https://spokendata.com/api/"
	}

	/**
	* given a video file sends it to get transcribed.
	* returns the uid to be able to retrive it once transcription is done.
	*/
	SpokenData.prototype.addNewRecording = function (fileName, callback){

	var putUrl = this.baseurl+this.userid+"/"+this.key+"/recording/put?filename="+fileName+"&language=english";

	postRequest(putUrl,fileName, function (result){
	// console.log("result");
	// console.log(result);
	callback(result.data.recording[0].$.id);
	});//postRequest
	}



	/**
	* Checks if recording has been processed.
	* takes in uid as param
	* if the recording is done it then returns the xml of the url to retrieve the transcription as second param of the callback.
	first param of the callback is status of recording it can be
	`done`,`failed`, `processing`.
	*/
	SpokenData.prototype.checkRecordingStatus = function(uid, cb){

	var url = this.baseurl+this.userid+"/"+this.key+"/recording/"+uid;

	request(url, function (error, response, body) {
	if (!error && response.statusCode == 200) {
	parseString(body, function (err, result) {
	var status = result['data']['recording'][0]['status'][0];
	if(status == "failed"){
	return cb(status);
	}else if(status == "done"){
	var xml_url = result.data.recording[0].annotation_xml_url[0];
	//get request , retrieves XML transcript from URL.
	request(xml_url, function (error, response, body) {
	if (!error && response.statusCode == 200) {

	// returns it as json
	var speakerDiarizationJson = JSON.parse(parser.toJson(body));

	return cb(status,speakerDiarizationJson );
	// return cb(status,body );
	}//if
	})//anonimous function handling response
	//end of get request
	}else if (status =="processing"){
	return cb(status);
	}else{
	return status;
	}//if else
	});//parseString
	}//if
	});//anonimous function handling response
	}


	SpokenData.prototype.getTranscriptionXml = function(url, cb){

	}


	/**
	* Helper Function post request
	*/
	function postRequest(postUrl, fileName, callback){
	// var fpath = __dirname + '/' + fileName;
	var fpath = fileName;//fpath
	var fstat = fs.statSync(fpath);
	var fsize = fstat['size'];

	//request.put
	var req = request.put(postUrl, {headers: { 'content-length': fsize }},

	function(err, httpResponse, body) {
	// console.log(httpResponse.statusCode);
	// console.log(body);
	if (err) {
	return console.error('upload failed:', err);
	}
	// console.log('Server responded with:', body);
	// checks if callback has been passed to the function
	//if it has been passed then pass body as argument to callback.
	// var uid = parseString(body);
	parseString(body, function (err, result) {
	if(callback){callback(result)}else{return result};
	});
	});
	fs.createReadStream(fpath).pipe(req);
	}//postRequest


	exports.SpokenData = SpokenData;