Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Transcribe video/audio using IBM Watson
var request = require('request');
var fs = require('fs');
var sox = require('sox');
var spawn = require('child_process').spawn;
var WATSON_USER = '';
var WATSON_PASS = '';
var url = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize';
function convertToWav(file, cb) {
var aud_file = file + '.temp.wav';
fs.exists(aud_file, function(exists) {
if (exists) {
cb(aud_file);
} else {
var convert = spawn('ffmpeg', ['-i', file, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '16000', aud_file]);
convert.on('close', function() {
cb(aud_file);
});
}
});
}
function split(file, cb) {
//split into 5 minute chunks
var maxLength = 60 * 5;
var total = 0;
var files = [];
sox.identify(file, function(err, results) {
var duration = results.duration;
if (duration > maxLength) {
var i = 0;
while (i < duration) {
total++;
var filePart = file + '.' + i + '.wav';
var args = [file, filePart, 'trim', '' + i, '' + maxLength];
var s = spawn('sox', args);
s.on('close', finishedSplit.bind({}, filePart, i));
i += maxLength;
}
} else {
cb([{
name: filename,
offset: 0
}]);
}
});
function finishedSplit(filename, start) {
files.push({
name: filename,
offset: start
});
total--;
if (total === 0) {
cb(files);
}
}
}
function send_to_watson(file, callback) {
fs.readFile(file, function(error, data) {
if (error) {
console.log(error);
return false;
}
var options = {
headers: {
'Content-Type': 'audio/wav',
'Transfer-Encoding': 'chunked',
},
qs: {
timestamps: 'true',
continuous: 'true',
inactivity_timeout: '-1'
},
auth: {
user: WATSON_USER,
pass: WATSON_PASS
},
body: data
};
request.post(url, options, function(error, response, body) {
if (error) console.log(error);
// var parsed = parse(JSON.parse(body));
if (typeof callback !== 'undefined') {
callback(JSON.parse(body));
}
});
});
}
function parse(data, offset) {
if (typeof offset === 'undefined') offset = 0;
var out = {
sentences: []
};
out.sentences = data.results.map(function(r) {
var item = {};
var _item = r.alternatives[0];
item.transcript = _item.transcript;
item.confidence = _item.confidence;
item.words = _item.timestamps.map(function(t) {
return {
word: t[0],
start: t[1] + offset,
end: t[2] + offset
};
});
return item;
});
return out;
}
function writeOut(file, data) {
var out = [];
data = data.sort(function(a, b) {
return a.f.offset - b.f.offset;
});
data.forEach(function(d) {
out = out.concat(d.data.sentences);
});
fs.writeFile(file + '.transcription.json', JSON.stringify(out), function() {
if (data.length > 1) {
data.forEach(function(d) {
fs.unlink(d.f.name);
});
}
});
}
function transcribe(file, cb) {
var out = [];
var total = 0;
convertToWav(file, function(newFile) {
split(newFile, function(files) {
total = files.length;
files.forEach(function(f) {
send_to_watson(f.name, function(data) {
var parsed = parse(data, f.offset);
out.push({
f: f,
data: parsed
});
total--;
if (total === 0) {
writeOut(file, out);
}
});
});
});
});
}
var file = process.argv[2];
transcribe(file);
module.exports = transcribe;
@antiboredom

This comment has been minimized.

Copy link
Owner Author

commented Dec 28, 2015

You need sox & ffmpeg installed for this to work...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.