Created
January 16, 2016 20:52
-
-
Save vsemozhetbyt/e35d9087f154082af524 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
/******************************************************************************/ | |
console.log('Requiring modules...'); | |
const fs = require('fs'); | |
const path = require('path'); | |
const readline = require('readline'); | |
const execSync = require('child_process').execFileSync; | |
const urlParser = require('url'); | |
const jsdom = require('jsdom'); | |
const outDir = process.argv[2] || __dirname; | |
const tocFile = fs.openSync(path.join(outDir, 'UD.toc.txt'), 'a'); | |
const logFile = fs.openSync(path.join(outDir, 'UD.toc.log'), 'a+'); | |
const errFile = fs.openSync(path.join(outDir, 'UD.toc.errors.log'), 'a'); | |
const abc = Array.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ').concat('%2A'); | |
let prevURL = ''; | |
let currURL = ''; | |
let terminate = false; | |
/******************************************************************************/ | |
process.on('exit', () => { | |
fs.closeSync(tocFile); | |
fs.closeSync(logFile); | |
fs.closeSync(errFile); | |
playAlert(); | |
}); | |
process.on('SIGINT', () => { | |
terminate = true; | |
}); | |
/******************************************************************************/ | |
if (fs.fstatSync(logFile).size === 0) { | |
console.log(process.title = 'A'); | |
getDoc(`http://www.urbandictionary.com/browse.php?character=${abc.shift()}`); | |
} else { | |
const rl = readline.createInterface({ | |
input: fs.createReadStream(null, {encoding: 'utf8', fd: logFile, autoClose: false}), | |
terminal: false, | |
historySize: 0 | |
}); | |
let lastLine; | |
console.log('Reading the log file...'); | |
rl.on('line', line => { | |
line = line.trim(); | |
if (line) lastLine = line; | |
}).on('close', () => { | |
let curChar = urlParser.parse(lastLine, true).query.character; | |
let nextPage = Number(urlParser.parse(lastLine, true).query.page || 1) + 1; | |
abc.splice(0, abc.indexOf(curChar) + 1); | |
console.log(process.title = `${curChar}: ${nextPage}`); | |
getDoc(`http://www.urbandictionary.com/browse.php?character=${curChar}&page=${nextPage}`); | |
}); | |
} | |
/******************************************************************************/ | |
function playAlert() { | |
execSync( | |
'f:\\BAK\\prg\\mm\\ffmpeg\\bin\\ffplay.exe', | |
['-v', 'quiet', '-nodisp', '-autoexit', '-af', 'volume=1.0', | |
'c:\\WINDOWS\\Media\\Windows Ringin.wav'] | |
); | |
} | |
/******************************************************************************/ | |
function getDoc(url) { | |
if (terminate) { | |
console.log('Exit on demand.'); | |
process.exit(); | |
} | |
prevURL = currURL; | |
currURL = url; | |
console.log(` ${url}`); | |
jsdom.env({ url, done: processDoc }); //, proxy: 'http://127.0.0.1:8888' | |
//setTimeout(() => { jsdom.env({ url, done: processDoc }); }, 1000); | |
} | |
/******************************************************************************/ | |
function processDoc(err, window) { | |
if (err) { | |
playAlert(); | |
if (currURL !== prevURL) { | |
fs.writeSync(errFile, | |
//////////////////////////////////////////////////////////////////////////////// | |
`jsdom error (${new Date()}). | |
${currURL} | |
${err} | |
` | |
//////////////////////////////////////////////////////////////////////////////// | |
, null, 'utf8'); | |
} | |
console.error(` ${err}`); | |
console.error(process.title = 'jsdom error. Retrying...'); | |
getDoc(currURL); | |
} else { | |
const doc = window.document; | |
const loc = window.location.href; | |
const links = doc.querySelectorAll('#columnist li a[href]'); | |
if (links.length) { | |
fs.writeSync(tocFile, | |
`${ Array.from(links, el => el.href).join('\n') }\n`, | |
null, 'utf8'); | |
fs.writeSync(logFile, `${loc}\n`, null, 'utf8'); | |
console.log(process.title = ` Links: ${links.length}.`); | |
const nextLink = doc.querySelector('#content div.pagination-centered li a[href][rel="next"]'); | |
if (nextLink) { | |
console.log(process.title = | |
nextLink.href.replace(/.+?\bcharacter=/, '').replace(/&page=/, ': ')); | |
getDoc(nextLink.href); | |
} else if (abc.length) { | |
console.log(process.title = 'Next character...'); | |
getDoc(`http://www.urbandictionary.com/browse.php?character=${abc.shift()}`); | |
} else { | |
process.exit(); | |
} | |
} else if (loc.startsWith('http://www.urbandictionary.com/browse.php?character=')) { | |
playAlert(); | |
console.error(process.title = 'HTTP error. Retrying...'); | |
getDoc(currURL); | |
} else if (abc.length) { | |
console.log(process.title = 'Next character...'); | |
getDoc(`http://www.urbandictionary.com/browse.php?character=${abc.shift()}`); | |
} else { | |
process.exit(); | |
} | |
} | |
} | |
/******************************************************************************/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Спасибо за подробную статью на Хабре!
Чтобы заработало в 2017 нужно установить jsdom версии 9.10.0. С более новыми версиями jsdom.env выдает ошибку, тк поменяли API jsdom. Как переписать код так и не разобрался, установил старую версию - заработало.