Skip to content

Instantly share code, notes, and snippets.

@codeas
Last active April 26, 2021 06:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codeas/8a0edf18c8be1c02eef9e8592410a862 to your computer and use it in GitHub Desktop.
Save codeas/8a0edf18c8be1c02eef9e8592410a862 to your computer and use it in GitHub Desktop.
OSCAR and Google NLP
var subitlesParser = function(fileId) {
var agregated = false;
var CHARS = "-->";
var BATCH_SIZE = 2; // time-window (e.g. 2minutes)
var epoch = 0;
var text = [];
var output = [];
var content = DriveApp.getFileById(fileId).getBlob().getDataAsString();
var rows = content.split("\n");
rows.forEach(function (row) {
if (row.indexOf(CHARS) > -1) {
var times = row.split(CHARS);
var to = times[1].split(":");
var hour = parseInt(to[0],10);
var minutes = parseInt(to[1],10);
var position = hour * 60 + minutes;
if (position > BATCH_SIZE*epoch) {
epoch++;
var sentence = text.join("")
sentence = sentence.replace(/\r?\n?/g,"");
if (sentence != "") {
var max = (BATCH_SIZE*(epoch-1));
output.push([max, sentence]);
text = [];
}
}
agregated = true;
} else if (row === String.fromCharCode(13)) {
agregated = false;
} else if (agregated) {
text.push(row)
}
});
return output;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment