Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Transcribe video/audio using IBM Watson
var request = require('request');
var fs = require('fs');
var sox = require('sox');
var spawn = require('child_process').spawn;
var WATSON_USER = '';
var WATSON_PASS = '';
var url = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize';
function convertToWav(file, cb) {
var aud_file = file + '.temp.wav';
fs.exists(aud_file, function(exists) {
if (exists) {
cb(aud_file);
} else {
var convert = spawn('ffmpeg', ['-i', file, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '16000', aud_file]);
convert.on('close', function() {
cb(aud_file);
});
}
});
}
function split(file, cb) {
//split into 5 minute chunks
var maxLength = 60 * 5;
var total = 0;
var files = [];
sox.identify(file, function(err, results) {
var duration = results.duration;
if (duration > maxLength) {
var i = 0;
while (i < duration) {
total++;
var filePart = file + '.' + i + '.wav';
var args = [file, filePart, 'trim', '' + i, '' + maxLength];
var s = spawn('sox', args);
s.on('close', finishedSplit.bind({}, filePart, i));
i += maxLength;
}
} else {
cb([{
name: filename,
offset: 0
}]);
}
});
function finishedSplit(filename, start) {
files.push({
name: filename,
offset: start
});
total--;
if (total === 0) {
cb(files);
}
}
}
function send_to_watson(file, callback) {
fs.readFile(file, function(error, data) {
if (error) {
console.log(error);
return false;
}
var options = {
headers: {
'Content-Type': 'audio/wav',
'Transfer-Encoding': 'chunked',
},
qs: {
timestamps: 'true',
continuous: 'true',
inactivity_timeout: '-1'
},
auth: {
user: WATSON_USER,
pass: WATSON_PASS
},
body: data
};
request.post(url, options, function(error, response, body) {
if (error) console.log(error);
// var parsed = parse(JSON.parse(body));
if (typeof callback !== 'undefined') {
callback(JSON.parse(body));
}
});
});
}
function parse(data, offset) {
if (typeof offset === 'undefined') offset = 0;
var out = {
sentences: []
};
out.sentences = data.results.map(function(r) {
var item = {};
var _item = r.alternatives[0];
item.transcript = _item.transcript;
item.confidence = _item.confidence;
item.words = _item.timestamps.map(function(t) {
return {
word: t[0],
start: t[1] + offset,
end: t[2] + offset
};
});
return item;
});
return out;
}
function writeOut(file, data) {
var out = [];
data = data.sort(function(a, b) {
return a.f.offset - b.f.offset;
});
data.forEach(function(d) {
out = out.concat(d.data.sentences);
});
fs.writeFile(file + '.transcription.json', JSON.stringify(out), function() {
if (data.length > 1) {
data.forEach(function(d) {
fs.unlink(d.f.name);
});
}
});
}
function transcribe(file, cb) {
var out = [];
var total = 0;
convertToWav(file, function(newFile) {
split(newFile, function(files) {
total = files.length;
files.forEach(function(f) {
send_to_watson(f.name, function(data) {
var parsed = parse(data, f.offset);
out.push({
f: f,
data: parsed
});
total--;
if (total === 0) {
writeOut(file, out);
}
});
});
});
});
}
var file = process.argv[2];
transcribe(file);
module.exports = transcribe;
@antiboredom

This comment has been minimized.

Show comment
Hide comment
@antiboredom

antiboredom Dec 28, 2015

You need sox & ffmpeg installed for this to work...

Owner

antiboredom commented Dec 28, 2015

You need sox & ffmpeg installed for this to work...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment