Skip to content

Instantly share code, notes, and snippets.

@mamuesp
Created December 8, 2015 13:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mamuesp/e08a4f9b484ab8a84748 to your computer and use it in GitHub Desktop.
Save mamuesp/e08a4f9b484ab8a84748 to your computer and use it in GitHub Desktop.
A smal node.js based tool to extract script tags from a HTML page.
/**
*
* Created by M.MUeller-Spaeth on 07.12.15.
* Copyright 2015 by M.Mueller-Spath, fms1961@gmail.com
*
* usage: node extract.js <HTML file> <output path> <attribute name for file names>
*
*/
var fs = require('fs');
var cheerio = require('cheerio');
var inputFile = process.argv[2];
var outputPath= (process.argv[3] + "/").replace("//", "/");
var tagAttr = process.argv[4];
checkPath(outputPath, true);
console.log("The file '" + inputFile + "' will be processed.");
console.log("The path '" + outputPath + "' will be the output directory.");
fs.readFile(inputFile, function (err, data) {
if (err) throw err;
extractScripts(data);
});
function checkPath(path, doCreate) {
try {
var test = fs.statSync(path).isDirectory();
if (!test && doCreate) {
fs.mkdirSync(path, 0o755);
return checkPath(path, false);
}
} catch (err) {
return false;
}
}
function extractScripts(data) {
var $ = cheerio.load(data);
$('script').each(function(i, element){
var name = $(this).attr(tagAttr);
if (name) {
var fileName = outputPath + name + ".html";
fs.writeFile(fileName, $(this).text(), function (err) {
if (err) {
return console.log(err);
}
console.log("The file '" + fileName + "' was saved!");
});
}
});
}
@mamuesp
Copy link
Author

mamuesp commented Dec 8, 2015

Needed a tool which extracts data hold in script tags in a HTML file. They had an attribute I could use as file name, so the tags content would be written in an HTML file in the output path as /.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment