Skip to content

Instantly share code, notes, and snippets.

@g6123
Created May 4, 2015 05:48
Show Gist options
  • Save g6123/4a275b6fa7194c5ad2a1 to your computer and use it in GitHub Desktop.
Save g6123/4a275b6fa7194c5ad2a1 to your computer and use it in GitHub Desktop.
smi2vtt.js
var async = require('async');
var fs = require('fs');
var os = require('os');
var detect_encoding = require('detect-encoding');
var Iconv = require('iconv').Iconv;
var split_by_tag = function(string, tagname, callback){
var pattern = '<'+tagname;
var list = string.split(new RegExp(pattern, 'gi'));
async.waterfall([
function(callback){
async.map(list, function(item, callback){
item = item.trim();
if(item){
callback(null, pattern+' '+item);
} else {
callback(null, null);
}
}, callback);
},
function(list, callback){
async.filter(list, function(item, callback){
if(item === null){
callback(false);
} else {
callback(true);
}
}, function(result){
callback(null, result);
});
}
], callback);
};
var zerofill = function(number, digit){
return ((new Array(digit).join('0'))+number).slice(-digit);
}
var ms2stamp =function(ms){
var s = 0;
var m = 0;
var h = 0;
ms = ms/1000;
s = parseInt(ms);
ms = ms-s;
if(s > 59){
m = parseInt(s/60);
s = s%60;
if(m > 59){
h = parseInt(m/60);
m = m%60;
}
}
var result = [zerofill(h, 2), zerofill(m, 2), zerofill(s, 2)].join(':');
result += ('.'+zerofill(ms, 3));
return result;
};
module.exports = function(file, update, log, callback){
update({ status: 'started', detail: null });
async.waterfall([
function(callback){
fs.readFile(file[0], function(error, buffer){
if(error){
callback('원본 자막 파일에 접근할 수 없습니다.', null);
} else {
callback(null, buffer);
}
});
},
function(buffer, callback){
detect_encoding(buffer, function(error, encoding){
if(error){
callback('원본 자막 파일의 인코딩을 감지할 수 없습니다.', null);
} else {
encoding = encoding.toLowerCase();
if(encoding === 'euc-kr'){
encoding = 'cp949';
}
callback(null, buffer, encoding);
}
});
},
function(buffer, encoding, callback){
var iconv = new Iconv(encoding, 'utf-8');
callback(null, iconv.convert(buffer).toString());
},
function(smi, callback){
split_by_tag(smi, 'sync', callback);
},
function(sync_list, callback){
sync_list.splice(0, 1);
sync_list.push(sync_list.pop().replace(/<( +)?\/( +)?body>/i, '').trim());
callback(null, sync_list);
},
function(sync_list, callback){
async.map(sync_list, function(item, callback){
var time = item.match(/<sync(.+)start=([0-9]+)(.+)?>/i)[2];
item = item.replace(/<sync[^>]+>/i, '');
split_by_tag(item, 'p', function(error, result){
if(error){
callback(error, null);
} else {
callback(null, [time, result]);
}
});
}, callback);
},
function(list, callback){
var parsed_sub = {};
async.each(list, function(item, callback){
var time = item[0];
var p_list = item[1];
async.each(p_list, function(item, callback){
var lang = item.match(/<p(.+)class=([a-z]+)(.+)?>/i)[2];
var content = item.split(/<p[^>]+>/i)[1].trim();
content = content.replace(/(\r\n|\n|\r)/g, '');
content = content.replace(/<br( +)?\/?( +)?>/gi, '\n');
content = content.replace(/<[^>]+>/g, '');
if(!parsed_sub[time]){
parsed_sub[time] = {};
}
parsed_sub[time][lang] = content;
callback(null);
}, callback);
}, function(error){
if(error){
callback(error, null);
} else {
callback(null, parsed_sub);
}
});
},
function(parsed_sub, callback){
var vtt_sub = 'WEBVTT\n';
var sub_index = 1;
var ms_list = Object.keys(parsed_sub);
ms_list.sort(function(one, another){ return one-another; });
ms_list.forEach(function(ms, ms_index){
var content = parsed_sub[ms]['KRCC'].trim();
if(content && content !== '&nbsp;'){
vtt_sub += ('\n'+sub_index);
vtt_sub += ('\n'+ms2stamp(ms)+' --> ');
if(ms_index === ms_list.length-1){
vtt_sub += ms2stamp(ms+5000);
} else {
vtt_sub += ms2stamp(ms_list[ms_index+1]);
}
vtt_sub += ('\n'+content+'\n');
sub_index++;
}
});
vtt_sub = vtt_sub.trim();
callback(null, vtt_sub);
},
function(vtt_sub, callback){
fs.writeFile(file[1], vtt_sub, function(error){
if(error){
callback('변환한 자막 파일을 저장할 수 없습니다.');
} else {
callback(null);
}
});
}
], function(error){
if(error){
var msg = '원본 자막 파일의 SAMI 문법을 분석하는 데 실패했습니다.';
msg += ' (';
msg += error.message;
msg += ')';
error.message = msg;
callback(error);
} else {
callback(null);
}
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment