Skip to content

Instantly share code, notes, and snippets.

@xfsnowind
Created December 29, 2016 00:19
Show Gist options
  • Save xfsnowind/d506ea17e9dbb66413df126801270ded to your computer and use it in GitHub Desktop.
Save xfsnowind/d506ea17e9dbb66413df126801270ded to your computer and use it in GitHub Desktop.
last ned disse norske tekstene til den tve serien "Skam" sesong 1-3 og lagre som srt filer
#!/usr/bin/env node
var request = require('request'),
fs = require("fs");
var skamSesongSubtitles = [["1", 'MSUB1912', '16AW', 11],
["2", 'MYNT1500', '16AA', 12],
["3", 'MYNT1520', '16AA', 10]];
// get the subtitle content with given sesong number, episode, url, saving folder and callback function
function getSubtitleContent (sesongNum, episode, url, savingFolder, handlerFunc) {
console.log("downloading the subtitle of ", episode, " in sesong ", sesongNum);
var episodeStr;
if (episode < 10) {
episodeStr = "0" + episode;
} else {
episodeStr = episode;
}
//calculate the saving folder.
var filePath = savingFolder + "/sesong " + sesongNum + "/Skam.S0" + sesongNum + "E" + episodeStr + ".srt";
// send the request
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
var result = handlerFunc(body);
console.log("successfully get subtitle of ", episode, " in sesong ", sesongNum, " with url: ", url);
//saving the handled result as file
fs.writeFile(filePath, result);
console.log("successfully saving subtitle of ", episode, " in sesong ", sesongNum, " to ", filePath);
}});
}
// with the extracted subtitle content, handled it andn return as string.
function handleSubtitle (htmlContent) {
var contentArray = htmlContent.split(/\r?\n?\s*<\/p>\r/),
contentLen = contentArray.length;
contentArray = contentArray.slice(1, contentLen - 1);
//extract the useful data from html format data
var handledContent = contentArray.map(function (item) {
var result = item.replace(/\r?\n?\s*<br \/>/g, "\n")
.replace(/\r\n\s+<span style="italic">/g, "")
.replace(/\s+<\/span>/g, "")
.split(/\s+<p data-begin="([\d\.:]+)">([^<]*)/g)
.slice(1, 3);
return [result[0].replace(/\./g, ","), result[1]];
});
//insert the index
var richedContent = handledContent.map(function (item, index) {
if (handledContent[index + 1]) {
item.splice(1, 0, handledContent[index + 1][0]);
} else {
item.splice(1, 0, item[0])
}
item.splice(3, 0, index + 1);
return item;
});
//convert the string to the srt file format
return richedContent.reduce(function(prev, item) {
if (prev == "") {
return item[3] + "\n" + item[0] + " --> " + item[1] + "\n" + item[2];
}
return prev + "\n\n" + item[3] + "\n" + item[0] + " --> " + item[1] + "\n" + item[2];
}, "");
}
skamSesongSubtitles.map(function(item) {
for (var i = 1; i <= item[3]; i++) {
var episodeStr;
if (i < 10) {
episodeStr = "0" + i;
} else {
episodeStr = i;
}
var url = "https://tv.nrk.no/programsubtitles/" + item[1] + episodeStr + item[2] + "/html";
getSubtitleContent(item[0], i, url, "/Users/fengxue/Media/Movies/skam", handleSubtitle);
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment