Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
'use strict';
/******************************************************************************/
console.log('Requiring modules...');
const fs = require('fs');
const path = require('path');
const readline = require('readline');
const execSync = require('child_process').execFileSync;
const urlParser = require('url');
const jsdom = require('jsdom');
const outDir = process.argv[2] || __dirname;
const tocFile = fs.openSync(path.join(outDir, 'UD.toc.txt'), 'a');
const logFile = fs.openSync(path.join(outDir, 'UD.toc.log'), 'a+');
const errFile = fs.openSync(path.join(outDir, 'UD.toc.errors.log'), 'a');
const abc = Array.from('ABCDEFGHIJKLMNOPQRSTUVWXYZ').concat('%2A');
let prevURL = '';
let currURL = '';
let terminate = false;
/******************************************************************************/
process.on('exit', () => {
fs.closeSync(tocFile);
fs.closeSync(logFile);
fs.closeSync(errFile);
playAlert();
});
process.on('SIGINT', () => {
terminate = true;
});
/******************************************************************************/
if (fs.fstatSync(logFile).size === 0) {
console.log(process.title = 'A');
getDoc(`http://www.urbandictionary.com/browse.php?character=${abc.shift()}`);
} else {
const rl = readline.createInterface({
input: fs.createReadStream(null, {encoding: 'utf8', fd: logFile, autoClose: false}),
terminal: false,
historySize: 0
});
let lastLine;
console.log('Reading the log file...');
rl.on('line', line => {
line = line.trim();
if (line) lastLine = line;
}).on('close', () => {
let curChar = urlParser.parse(lastLine, true).query.character;
let nextPage = Number(urlParser.parse(lastLine, true).query.page || 1) + 1;
abc.splice(0, abc.indexOf(curChar) + 1);
console.log(process.title = `${curChar}: ${nextPage}`);
getDoc(`http://www.urbandictionary.com/browse.php?character=${curChar}&page=${nextPage}`);
});
}
/******************************************************************************/
function playAlert() {
execSync(
'f:\\BAK\\prg\\mm\\ffmpeg\\bin\\ffplay.exe',
['-v', 'quiet', '-nodisp', '-autoexit', '-af', 'volume=1.0',
'c:\\WINDOWS\\Media\\Windows Ringin.wav']
);
}
/******************************************************************************/
function getDoc(url) {
if (terminate) {
console.log('Exit on demand.');
process.exit();
}
prevURL = currURL;
currURL = url;
console.log(` ${url}`);
jsdom.env({ url, done: processDoc }); //, proxy: 'http://127.0.0.1:8888'
//setTimeout(() => { jsdom.env({ url, done: processDoc }); }, 1000);
}
/******************************************************************************/
function processDoc(err, window) {
if (err) {
playAlert();
if (currURL !== prevURL) {
fs.writeSync(errFile,
////////////////////////////////////////////////////////////////////////////////
`jsdom error (${new Date()}).
${currURL}
${err}
`
////////////////////////////////////////////////////////////////////////////////
, null, 'utf8');
}
console.error(` ${err}`);
console.error(process.title = 'jsdom error. Retrying...');
getDoc(currURL);
} else {
const doc = window.document;
const loc = window.location.href;
const links = doc.querySelectorAll('#columnist li a[href]');
if (links.length) {
fs.writeSync(tocFile,
`${ Array.from(links, el => el.href).join('\n') }\n`,
null, 'utf8');
fs.writeSync(logFile, `${loc}\n`, null, 'utf8');
console.log(process.title = ` Links: ${links.length}.`);
const nextLink = doc.querySelector('#content div.pagination-centered li a[href][rel="next"]');
if (nextLink) {
console.log(process.title =
nextLink.href.replace(/.+?\bcharacter=/, '').replace(/&page=/, ': '));
getDoc(nextLink.href);
} else if (abc.length) {
console.log(process.title = 'Next character...');
getDoc(`http://www.urbandictionary.com/browse.php?character=${abc.shift()}`);
} else {
process.exit();
}
} else if (loc.startsWith('http://www.urbandictionary.com/browse.php?character=')) {
playAlert();
console.error(process.title = 'HTTP error. Retrying...');
getDoc(currURL);
} else if (abc.length) {
console.log(process.title = 'Next character...');
getDoc(`http://www.urbandictionary.com/browse.php?character=${abc.shift()}`);
} else {
process.exit();
}
}
}
/******************************************************************************/
@radioaktive

This comment has been minimized.

Copy link

@radioaktive radioaktive commented Dec 9, 2017

Спасибо за подробную статью на Хабре!
Чтобы заработало в 2017 нужно установить jsdom версии 9.10.0. С более новыми версиями jsdom.env выдает ошибку, тк поменяли API jsdom. Как переписать код так и не разобрался, установил старую версию - заработало.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment