bpedro/apiDocsCompare.js

## apiDocsCompare.js
var importio = require('import-io').client,
    fs = require('fs'),
    sha1 = require('sha1'),
    http = require('http-get');

// importio configuration
var userGuid = 'YOUR_USER_GUID';
var apiKey = 'YOUR_API_KEY';
var io = new importio(userGuid,
                      apiKey,
                      'query.import.io');

// the page crawler guid
var pageGuid = 'A_PAGE_CRAWLER_GUID';

// the index crawler guid
var indexGuid = 'AN_INDEX_CRAWLER_GUID';

// the page index
var idx = 1;

/**
 * Index callback function.
 *
 * Gets messages from the API when crawling the index,
 * saves results and triggers the page crawler.
 */
var indexCb = function() {
  var data = [];
  return function(finished, msg) {
    if (msg.type == 'MESSAGE' &&
        msg.hasOwnProperty("data")) {
      data = data.concat(msg.data.results);
    }
    if (finished) {
      for (var i = data.length - 1; i >= 0; i--) {
        io.query({
          'connectorGuids': [pageGuid],
          'input': {
            'webpage/url': 'http://publicapis.com' +
                           data[i]['url/_source']
          }
        }, pageCb());
      };

      // increment page number and re-crawl the index
      idx++;
      io.query({
        'connectorGuids': [indexGuid],
        'input': {
          'webpage/url': 'http://publicapis.com/apis/' +
                         idx
        }
      }, indexCb());
    }
  }
}

/**
 * Page callback function.
 *
 * Gets a message from the API when crawling each page,
 * saves results and calls the page hashing
 * and comparison.
 */
var pageCb = function() {
  var data = [];
  return function(finished, msg) {
    if (msg.type == 'MESSAGE' &&
        msg.hasOwnProperty('data')) {
      data = data.concat(msg.data.results);
    }
    if (finished) {
      hashAndCompare(data[0].url);
      done = true;
    }
  }
}

/**
 * Hash and compare URL contents with a
 * previous saved version.
 */
var hashAndCompare = function(docUrl) {
  http.get(docUrl, function(err, res) {
    if (!err) {
      var previous = fs.readFileSync(sha1(docUrl));
      if (previous != sha1(res.buffer)) {
        console.log(docUrl + ' changed!');
        fs.writeFileSync(sha1(docUrl),
                         sha1(res.buffer));
      }
    }
  });
}

// Connect to the server
io.connect(function(connected) {
  // Check connect succeeded on callback
  if (!connected) {
    console.error('Unable to connect');
    return;
  }

  // Query for the publicapis index
  io.query({
    'connectorGuids': [indexGuid],
    'input': {
      'webpage/url': 'http://publicapis.com/apis/1'
    }
  }, indexCb());

});
	var importio = require('import-io').client,
	fs = require('fs'),
	sha1 = require('sha1'),
	http = require('http-get');

	// importio configuration
	var userGuid = 'YOUR_USER_GUID';
	var apiKey = 'YOUR_API_KEY';
	var io = new importio(userGuid,
	apiKey,
	'query.import.io');

	// the page crawler guid
	var pageGuid = 'A_PAGE_CRAWLER_GUID';

	// the index crawler guid
	var indexGuid = 'AN_INDEX_CRAWLER_GUID';

	// the page index
	var idx = 1;

	/**
	* Index callback function.
	*
	* Gets messages from the API when crawling the index,
	* saves results and triggers the page crawler.
	*/
	var indexCb = function() {
	var data = [];
	return function(finished, msg) {
	if (msg.type == 'MESSAGE' &&
	msg.hasOwnProperty("data")) {
	data = data.concat(msg.data.results);
	}
	if (finished) {
	for (var i = data.length - 1; i >= 0; i--) {
	io.query({
	'connectorGuids': [pageGuid],
	'input': {
	'webpage/url': 'http://publicapis.com' +
	data[i]['url/_source']
	}
	}, pageCb());
	};

	// increment page number and re-crawl the index
	idx++;
	io.query({
	'connectorGuids': [indexGuid],
	'input': {
	'webpage/url': 'http://publicapis.com/apis/' +
	idx
	}
	}, indexCb());
	}
	}
	}

	/**
	* Page callback function.
	*
	* Gets a message from the API when crawling each page,
	* saves results and calls the page hashing
	* and comparison.
	*/
	var pageCb = function() {
	var data = [];
	return function(finished, msg) {
	if (msg.type == 'MESSAGE' &&
	msg.hasOwnProperty('data')) {
	data = data.concat(msg.data.results);
	}
	if (finished) {
	hashAndCompare(data[0].url);
	done = true;
	}
	}
	}

	/**
	* Hash and compare URL contents with a
	* previous saved version.
	*/
	var hashAndCompare = function(docUrl) {
	http.get(docUrl, function(err, res) {
	if (!err) {
	var previous = fs.readFileSync(sha1(docUrl));
	if (previous != sha1(res.buffer)) {
	console.log(docUrl + ' changed!');
	fs.writeFileSync(sha1(docUrl),
	sha1(res.buffer));
	}
	}
	});
	}

	// Connect to the server
	io.connect(function(connected) {
	// Check connect succeeded on callback
	if (!connected) {
	console.error('Unable to connect');
	return;
	}

	// Query for the publicapis index
	io.query({
	'connectorGuids': [indexGuid],
	'input': {
	'webpage/url': 'http://publicapis.com/apis/1'
	}
	}, indexCb());

	});