'use strict'; | |
/******************************************************************************/ | |
console.log('Requiring modules...'); | |
const fs = require('fs'); | |
const path = require('path'); | |
const readline = require('readline'); | |
const execSync = require('child_process').execFileSync; | |
const urlParser = require('url'); | |
const jsdom = require('jsdom'); | |
const outDir = process.argv[2] || __dirname; | |
const tocFile = fs.openSync(path.join(outDir, 'WordSpy.toc.txt'), 'a'); | |
const logFile = fs.openSync(path.join(outDir, 'WordSpy.toc.log'), 'a+'); | |
const errFile = fs.openSync(path.join(outDir, 'WordSpy.toc.errors.log'), 'a'); | |
const tocURLs = Array.from('1abcdefghijklmnopqrstuvwxyz', | |
el => `http://www.wordspy.com/index.php?alpha=${el}`) | |
.concat('http://www.wordspy.com/index.php?tag=all-by-category'); | |
const selectorsToCheck = ['.index-words a[href]']; | |
const checkFrequency = 100; | |
let prevURL = ''; | |
let currURL = ''; | |
let terminate = false; | |
/******************************************************************************/ | |
process.on('exit', () => { | |
fs.closeSync(tocFile); | |
fs.closeSync(logFile); | |
fs.closeSync(errFile); | |
playAlert(); | |
}); | |
process.on('SIGINT', () => { | |
terminate = true; | |
}); | |
/******************************************************************************/ | |
if (fs.fstatSync(logFile).size === 0) { | |
console.log(process.title = '#'); | |
getDoc(tocURLs.shift()); | |
} else { | |
const rl = readline.createInterface({ | |
input: fs.createReadStream(null, {encoding: 'utf8', fd: logFile, autoClose: false}), | |
terminal: false, | |
historySize: 0 | |
}); | |
let lastLine; | |
console.log('Reading the log file...'); | |
rl.on('line', line => { | |
line = line.trim(); | |
if (line) lastLine = line; | |
}).on('close', () => { | |
tocURLs.splice(0, tocURLs.indexOf(lastLine) + 1); | |
let nextURL = tocURLs.shift(); | |
console.log(process.title = urlParser.parse(nextURL, true).query.alpha || | |
urlParser.parse(nextURL, true).query.tag); | |
getDoc(nextURL); | |
}); | |
} | |
/******************************************************************************/ | |
function playAlert() { | |
execSync( | |
'f:\\BAK\\prg\\mm\\ffmpeg\\bin\\ffplay.exe', | |
['-v', 'quiet', '-nodisp', '-autoexit', '-af', 'volume=1.0', | |
'c:\\WINDOWS\\Media\\Windows Ringin.wav'] | |
); | |
} | |
/******************************************************************************/ | |
function getDoc(url) { | |
if (terminate) { | |
console.log('Exit on demand.'); | |
process.exit(); | |
} | |
prevURL = currURL; | |
currURL = url; | |
console.log(` ${url}`); | |
jsdom.env({ url, done: checkDoc, features: { | |
FetchExternalResources: ["script"], ProcessExternalResources: ["script"] | |
} }); //, proxy: 'http://127.0.0.1:8888' | |
} | |
/******************************************************************************/ | |
function checkDoc(err, window) { | |
if (err) { | |
playAlert(); | |
if (currURL !== prevURL) { | |
fs.writeSync(errFile, | |
//////////////////////////////////////////////////////////////////////////////// | |
`jsdom error (${new Date()}). | |
${currURL} | |
${err} | |
` | |
//////////////////////////////////////////////////////////////////////////////// | |
, null, 'utf8'); | |
} | |
console.error(` ${err}`); | |
console.error(process.title = 'jsdom error. Retrying...'); | |
getDoc(currURL); | |
} else { | |
const doc = window.document; | |
const loc = window.location.href; | |
let iter = 0; | |
const checker = setInterval(() => { | |
iter++; | |
if (doc.querySelectorAll(selectorsToCheck.join(', ')).length) { | |
clearInterval(checker); | |
console.log(` ${iter * checkFrequency} ms`); | |
processDoc(doc, loc); | |
} else if (iter > 50) { | |
clearInterval(checker); | |
console.log(` ${iter * checkFrequency} ms`); | |
if (loc === currURL) { | |
playAlert(); | |
console.error(process.title = 'HTTP error. Retrying...'); | |
getDoc(currURL); | |
} else { | |
fs.writeSync(errFile, | |
//////////////////////////////////////////////////////////////////////////////// | |
`Something wrong (${new Date()}). | |
${currURL} | |
` | |
//////////////////////////////////////////////////////////////////////////////// | |
, null, 'utf8'); | |
console.error('Something wrong...'); | |
process.exit(); | |
} | |
} | |
}, checkFrequency); | |
} | |
} | |
/******************************************************************************/ | |
function processDoc(doc, loc) { | |
const links = doc.querySelectorAll('.index-words a[href]'); | |
if (loc === 'http://www.wordspy.com/index.php?tag=all-by-category') { | |
fs.writeSync(tocFile, `${loc}\n`, null, 'utf8'); | |
} | |
fs.writeSync(tocFile, | |
`${ Array.from(links, el => el.href).join('\n') }\n`, | |
null, 'utf8'); | |
fs.writeSync(logFile, `${loc}\n`, null, 'utf8'); | |
console.log(process.title = ` Links: ${links.length}.`); | |
if (tocURLs.length) { | |
let nextURL = tocURLs.shift(); | |
console.log(process.title = urlParser.parse(nextURL, true).query.alpha | |
|| | |
urlParser.parse(nextURL, true).query.tag); | |
getDoc(nextURL); | |
} else { | |
process.exit(); | |
} | |
} | |
/******************************************************************************/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment