-
-
Save parkerproject/8639860 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var importio = require('import-io').client, | |
fs = require('fs'), | |
sha1 = require('sha1'), | |
http = require('http-get'); | |
// importio configuration | |
var userGuid = 'YOUR_USER_GUID'; | |
var apiKey = 'YOUR_API_KEY'; | |
var io = new importio(userGuid, | |
apiKey, | |
'query.import.io'); | |
// the page crawler guid | |
var pageGuid = 'A_PAGE_CRAWLER_GUID'; | |
// the index crawler guid | |
var indexGuid = 'AN_INDEX_CRAWLER_GUID'; | |
// the page index | |
var idx = 1; | |
/** | |
* Index callback function. | |
* | |
* Gets messages from the API when crawling the index, | |
* saves results and triggers the page crawler. | |
*/ | |
var indexCb = function() { | |
var data = []; | |
return function(finished, msg) { | |
if (msg.type == 'MESSAGE' && | |
msg.hasOwnProperty("data")) { | |
data = data.concat(msg.data.results); | |
} | |
if (finished) { | |
for (var i = data.length - 1; i >= 0; i--) { | |
io.query({ | |
'connectorGuids': [pageGuid], | |
'input': { | |
'webpage/url': 'http://publicapis.com' + | |
data[i]['url/_source'] | |
} | |
}, pageCb()); | |
}; | |
// increment page number and re-crawl the index | |
idx++; | |
io.query({ | |
'connectorGuids': [indexGuid], | |
'input': { | |
'webpage/url': 'http://publicapis.com/apis/' + | |
idx | |
} | |
}, indexCb()); | |
} | |
} | |
} | |
/** | |
* Page callback function. | |
* | |
* Gets a message from the API when crawling each page, | |
* saves results and calls the page hashing | |
* and comparison. | |
*/ | |
var pageCb = function() { | |
var data = []; | |
return function(finished, msg) { | |
if (msg.type == 'MESSAGE' && | |
msg.hasOwnProperty('data')) { | |
data = data.concat(msg.data.results); | |
} | |
if (finished) { | |
hashAndCompare(data[0].url); | |
done = true; | |
} | |
} | |
} | |
/** | |
* Hash and compare URL contents with a | |
* previous saved version. | |
*/ | |
var hashAndCompare = function(docUrl) { | |
http.get(docUrl, function(err, res) { | |
if (!err) { | |
var previous = fs.readFileSync(sha1(docUrl)); | |
if (previous != sha1(res.buffer)) { | |
console.log(docUrl + ' changed!'); | |
fs.writeFileSync(sha1(docUrl), | |
sha1(res.buffer)); | |
} | |
} | |
}); | |
} | |
// Connect to the server | |
io.connect(function(connected) { | |
// Check connect succeeded on callback | |
if (!connected) { | |
console.error('Unable to connect'); | |
return; | |
} | |
// Query for the publicapis index | |
io.query({ | |
'connectorGuids': [indexGuid], | |
'input': { | |
'webpage/url': 'http://publicapis.com/apis/1' | |
} | |
}, indexCb()); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment