Skip to content

Instantly share code, notes, and snippets.

@rhodey
Created October 21, 2014 08:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rhodey/a0527cba9a7b60e94fda to your computer and use it in GitHub Desktop.
Save rhodey/a0527cba9a7b60e94fda to your computer and use it in GitHub Desktop.
A quick & dirty script to monitor the "ST LOUIS COPTALK" forum for deleted messages (http://members.boardhost.com/stlouiscoptalk/index.html). A list of message ids is saved to "process.argv[2].Date.now()", make this a cron job and then identify deleted messages through diffs.
var fs = require('fs');
var split = require('split');
var http = require('http');
var pages = new Array();
var messageIds = new Array();
var pageScrapeCount = 0;
var msgIdFileName = process.argv[2] + '.' + Date.now();
var msgIdFile = fs.createWriteStream(msgIdFileName);
function getUnique(a) {
return a.sort().filter(function(item, pos) {
return !pos || item != a[pos - 1];
})
}
function addPagesFromLine(offset, line) {
var index = line.indexOf('<a href="index-', offset);
if (index >= 0) {
var pageNumber = line.substring((index + 15), (index + 16));
pages.push('http://members.boardhost.com/stlouiscoptalk/index-' + pageNumber + '.html');
addPagesFromLine(index + 1, line);
}
}
function handleGetPages(cb) {
pages.push('http://members.boardhost.com/stlouiscoptalk/index.html');
http.get('http://members.boardhost.com/stlouiscoptalk/index.html', function(res) {
var lines = res.pipe(split());
lines.on('data', function (line) {
addPagesFromLine(0, line);
});
lines.on('end', function (line) {
pages = getUnique(pages);
cb();
});
}).on('error', function(e) {
process.stderr.write("Got error: " + e.message);
});
}
function addMessageIdsFromLine(offset, line) {
var startIndex = line.indexOf('a href="msg/', offset);
if (startIndex >= 0) {
var endIndex = line.indexOf('.html', startIndex);
var msgId = line.substring((startIndex + 12), endIndex);
if (msgId.indexOf(' ') < 0)
messageIds.push(msgId);
addMessageIdsFromLine(startIndex + 1, line);
}
}
function handleGetMessageIdsFromPage(page, cb) {
process.stdout.write('getting message ids from page ' + page + '\n');
http.get(page, function(res) {
var lines = res.pipe(split());
lines.on('data', function (line) {
addMessageIdsFromLine(0, line);
});
lines.on('end', function (line) {
messageIds = getUnique(messageIds);
cb();
});
}).on('error', function(e) {
process.stderr.write.log("Got error: " + e.message);
});
}
function handlePageScraped() {
if (++pageScrapeCount < pages.length)
return;
for(var id in messageIds)
msgIdFile.write(messageIds[id] + '\n');
}
function handleScrapePages() {
process.stdout.write('found ' + pages.length + ' pages containing message lists\n');
for (var page in pages)
handleGetMessageIdsFromPage(pages[page], handlePageScraped);
}
handleGetPages(handleScrapePages);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment