Skip to content

Instantly share code, notes, and snippets.

@gliese1337
Created August 16, 2011 20:16
Show Gist options
  • Save gliese1337/1150054 to your computer and use it in GitHub Desktop.
Save gliese1337/1150054 to your computer and use it in GitHub Desktop.
Non-incremental parser for WebVTT files.
/*
http://www.whatwg.org/specs/web-apps/current-work/webvtt.html
*/
function parseWebVTT(input){
"use strict";
var line,l,p,cue_list=[],
cue,cue_text,id,fields,
time_pat = /\s*(\d*:?[0-5]\d:[0-5]\d\.\d\d\d)\s*-->\s*(\d*:?[0-5]\d:[0-5]\d\.\d\d\d)\s*(.*)/;
//If the first character is a BYTE ORDER MARK (BOM) character, advance position to the next character in input.
l = p = +(input[0] === '\uFEFF');
//Collect a sequence of characters that are not CR or LF characters.
while(p<input.length && input[p]!=='\r' && input[p] !=='\n'){p++;}
//If line is less than six characters long, then abort these steps. The file is not a WebVTT file.
if(p-l<6){throw new Error("Not WebVTT Data");}
line = input.substring(l,p);
//If the first six characters do not exactly equal "WEBVTT", then abort these steps. The file is not a WebVTT file.
//If line is more than six characters long but the seventh character is neither a U+0020 SPACE character nor a U+0009 CHARACTER TABULATION (tab) character, then abort these steps. The file is not a WebVTT file.
if(!/^WEBVTT([\u0020\u0009].*|$)/.test(line)){throw new Error("Not WebVTT Data");}
//If position is past the end of input, then jump to the step labeled end.
if(p>=input.length){return cue_list;}
do{ //Header:
if(input[p] === '\r'){ //Skip CR
//If position is past the end of input, then jump to the step labeled end.
if(++p>=input.length){return cue_list;}
}
if(input[p] === '\n'){//Skip LF
if(++p>=input.length){return cue_list;}
}
l=p; //Collect a sequence of characters that are not CR or LF characters.
while(input[p] !== '\r' && input[p] !== '\n'){
if(++p>=input.length){return cue_list;}
}
}while(l!==p); //If line is not the empty string, then jump back to the step labeled header.
cue_loop: do{
//Skip CR & LF characters.
while(input[p]==='\r' || input[p]==='\n'){
if(++p>=input.length){break cue_loop;}
}
l=p; //Collect a sequence of characters that are not CR or LF characters.
while(input[p]!=='\r' && input[p] !=='\n'){
if(++p>=input.length){break cue_loop;}
}
line = input.substring(l,p);
//If line does not contain "-->", treat it as an id & get a new line
if(line.indexOf('-->')===-1){
if(input[p] === '\r'){ //Skip CR
if(++p>=input.length){break cue_loop;}
}
if(input[p] === '\n'){ //Skip LF
if(++p>=input.length){break cue_loop;}
}
l=p; //Collect a sequence of characters that are not CR or LF characters.
while(input[p]!=='\r' && input[p] !=='\n'){
if(++p>=input.length){break cue_loop;}
} //If line is the empty string, jump to the step labeled cue loop.
if(l===p){continue cue_loop;}
id = line; //Let cue's text track cue identifier be the previous line.
line = input.substring(l,p);
}else{id = '';}
cue = { //set default cue parameters
id:id, text:'',
pause_on_exit:false,
wdir:'horizontal', snap:true,
line:'auto', position:50,
size:100, align:'middle'
};
//Timings:
try{ //Collect WebVTT cue timings and settings from line, using cue for the results.
if(!(fields = time_pat.exec(line))){throw new Error("Invalid Timestamp Data");}
cue.start = parse_timestamp(fields[1]);
cue.stop = parse_timestamp(fields[2]);
parse_settings(cue,fields[3]);
} catch(e) {
console.log(e.stack);
do{ //Bad cue loop:
if(input[p] === '\r'){ //Skip CR
if(++p>=input.length){break cue_loop;}
}
if(input[p] === '\n'){ //Skip LF
if(++p>=input.length){break cue_loop;}
}
l=p; //Collect a sequence of characters that are not CR or LF characters.
while(input[p]!=='\r' && input[p] !=='\n'){
if(++p>=input.length){break cue_loop;}
}
}while(l!==p);//If line is the empty string, then jump to the step labeled cue loop.
continue cue_loop;
}
cue_text = [];
do{ //Cue text loop:
if(input[p] === '\r'){ //Skip CR
//If position is past the end of input, jump to cue text processing.
if(++p===input.length){break;}
}
if(input[p] === '\n'){ //Skip LF
if(++p===input.length){break;}
}
l=p; //Collect a sequence of characters that are not CR or LF characters.
while(p<input.length && input[p]!=='\r' && input[p] !=='\n'){p++;}
if(l===p){break;} //If line is the empty string, then jump to the step labeled cue text processing.
//Replace all U+0000 NULL characters in input by U+FFFD REPLACEMENT CHARACTERs.
cue_text.push(input.substring(l,p).replace('\0','\uFFFD'));
}while(p<input.length);
//Cue text processing:
cue.text = cue_text.join('\n');
//This where we ought to construct the cue-text DOM
cue_list.push(cue); //Add cue to the text track list of cues output.
}while(p<input.length);
//End: The file has ended. The WebVTT parser has finished.
return cue_list;
};
function parse_timestamp(input){
var ret,p,fields;
"use strict";
if(input[0]===':'){throw new SyntaxError("Unexpected Colon");}
fields = input.split(/[:.]/);
if(fields.length===4){
ret = parseInt(fields[0],10)*3600+parseInt(fields[3],10)/1000;
p = 1;
}else{
ret = parseInt(fields[2],10)/1000;
p = 0;
}
return ret + parseInt(fields[p],10)*60 + parseInt(fields[++p],10);
}
function parse_settings(cue,input){
"use strict";
var match,value,number,
set_pat = /([ADLST]):(\S+)/g;
while(!!(match = set_pat.exec(input))){
value = match[2];
switch(match[1]){
case 'A': //Alignment
if(value==='start' || value==='middle' || value==='end'){cue.align=value;}
continue;
case 'D': //Text direction
if(value === 'vertical' || value === 'vertical-lr'){cue.wdir = value;}
continue;
case 'L': //Line position
if(/^-?\d+%?$/.test(value)){
number = parseInt(value,10);
if(value[value.length-1] === '%'){ //If the last character in value is %
if(number<0 || number>100){continue;}
cue.snap = false;
}
cue.line = number;
}
continue;
case 'S': //Text Size
if(/^\d+%$/.test(value)){
number = parseInt(value,10);
if(number>=0 && number<=100){
cue.size = number;
}
}
continue;
case 'T': //Text Position
if(/^\d+%$/.test(value)){
number = parseInt(value,10);
if(number>=0 && number<=100){
cue.position = number;
}
}
continue;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment