Last active
January 3, 2016 22:49
-
-
Save bpedro/8530629 to your computer and use it in GitHub Desktop.
Crawl the Public API Directory, grab API documentation URLs and compare each documentation page with a previous saved version. This script uses import·io. Please see http://support.import.io/knowledgebase/articles/258104-integrate-import-io-with-node-js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var importio = require('import-io').client, | |
fs = require('fs'), | |
sha1 = require('sha1'), | |
http = require('http-get'); | |
// importio configuration | |
var userGuid = 'YOUR_USER_GUID'; | |
var apiKey = 'YOUR_API_KEY'; | |
var io = new importio(userGuid, | |
apiKey, | |
'query.import.io'); | |
// the page crawler guid | |
var pageGuid = 'A_PAGE_CRAWLER_GUID'; | |
// the index crawler guid | |
var indexGuid = 'AN_INDEX_CRAWLER_GUID'; | |
// the page index | |
var idx = 1; | |
/** | |
* Index callback function. | |
* | |
* Gets messages from the API when crawling the index, | |
* saves results and triggers the page crawler. | |
*/ | |
var indexCb = function() { | |
var data = []; | |
return function(finished, msg) { | |
if (msg.type == 'MESSAGE' && | |
msg.hasOwnProperty("data")) { | |
data = data.concat(msg.data.results); | |
} | |
if (finished) { | |
for (var i = data.length - 1; i >= 0; i--) { | |
io.query({ | |
'connectorGuids': [pageGuid], | |
'input': { | |
'webpage/url': 'http://publicapis.com' + | |
data[i]['url/_source'] | |
} | |
}, pageCb()); | |
}; | |
// increment page number and re-crawl the index | |
idx++; | |
io.query({ | |
'connectorGuids': [indexGuid], | |
'input': { | |
'webpage/url': 'http://publicapis.com/apis/' + | |
idx | |
} | |
}, indexCb()); | |
} | |
} | |
} | |
/** | |
* Page callback function. | |
* | |
* Gets a message from the API when crawling each page, | |
* saves results and calls the page hashing | |
* and comparison. | |
*/ | |
var pageCb = function() { | |
var data = []; | |
return function(finished, msg) { | |
if (msg.type == 'MESSAGE' && | |
msg.hasOwnProperty('data')) { | |
data = data.concat(msg.data.results); | |
} | |
if (finished) { | |
hashAndCompare(data[0].url); | |
done = true; | |
} | |
} | |
} | |
/** | |
* Hash and compare URL contents with a | |
* previous saved version. | |
*/ | |
var hashAndCompare = function(docUrl) { | |
http.get(docUrl, function(err, res) { | |
if (!err) { | |
var previous = fs.readFileSync(sha1(docUrl)); | |
if (previous != sha1(res.buffer)) { | |
console.log(docUrl + ' changed!'); | |
fs.writeFileSync(sha1(docUrl), | |
sha1(res.buffer)); | |
} | |
} | |
}); | |
} | |
// Connect to the server | |
io.connect(function(connected) { | |
// Check connect succeeded on callback | |
if (!connected) { | |
console.error('Unable to connect'); | |
return; | |
} | |
// Query for the publicapis index | |
io.query({ | |
'connectorGuids': [indexGuid], | |
'input': { | |
'webpage/url': 'http://publicapis.com/apis/1' | |
} | |
}, indexCb()); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment