Skip to content

Instantly share code, notes, and snippets.

@kayslay
Last active August 15, 2018 12:44
Show Gist options
  • Save kayslay/d732ac513a02f9299e1c2001b5dbbd65 to your computer and use it in GitHub Desktop.
Save kayslay/d732ac513a02f9299e1c2001b5dbbd65 to your computer and use it in GitHub Desktop.
crawl.js for Build a CLI to crawl a web page with web-crawljs article
#!/usr/bin/env node
/**
* Created by kayslay on 5/31/17.
*/
const crawler = require('web-crawljs');
const program = require('commander');
//commander configuration
function list(val) {
"use strict";
return val.split(',');
}
program
.option('-x --execute <string>', 'the configurtion to execute')
.option('-d --depth [number]', 'the depth of the crawl')
.option('-u --urls [items]', 'change the urls',list)
.parse(process.argv);
//throw an error if the execute flag is not used
if (!program.execute) {
throw new Error('the configuration to use must be set use the -x flag to define configuration;' +
' use the --help for help')
}
//holds the additional configuration that will be added to crawlConfig
const additionalConfig = {};
//set the object that will override the default crawlConfig
(function (config) {
//depth
if (program.depth) config['depth'] = program.depth;
if(!!program.urls) config['urls'] = program.urls
})(additionalConfig);
//the action is the file name that holds the crawlConfig
let action = program.execute;
try {
//set the crawlConfig
//adds the additional config if need
let crawlConfig = Object.assign(require(`./config/${action}`), additionalConfig);
const Crawler = crawler(crawlConfig);
Crawler.CrawlAllUrl()
} catch (err) {
console.error(`An Error occurred: ${err.message}`);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment