Skip to content

Instantly share code, notes, and snippets.

@tommelo
Created September 10, 2017 22:47
Show Gist options
  • Save tommelo/39980388ca9388e8ded1892dc44f0304 to your computer and use it in GitHub Desktop.
Save tommelo/39980388ca9388e8ded1892dc44f0304 to your computer and use it in GitHub Desktop.
A Simple Google Scraper
'use strict';
var url = require('url');
var util = require('util');
var Scraper = require('./scraper');
/**
* The defult configuration
*/
var HOST = 'https://google.com'; // google's host
var PATH = '/search'; // search path
var LIMIT = 10; // google's page limit
var START_AT_PAGE = 0; // starts at page 0
var URL_ELEMENT = '.g h3 a'; // html tag to search
/**
* The default constructor
*/
function Google() {
Scraper.call(this);
}
/**
* The implementation of the abstract method transform()
* Returns a single array of the given multi array
*
* @param {Array} result The multi array
* @return {Array} single The single array
*/
Google.prototype.transform = function(result) {
return [].concat.apply([], result);
}
/**
* The implementation of the abstract method scrape()
* It performs a scrape of the Google's search page.
*
* @param {Function} $ The cheerio instance
* @return {Array} links The Google's search links
*/
Google.prototype.scrape = function($) {
var links = [];
$(URL_ELEMENT).each(function(i, element) {
var href = $(element).attr('href');
var parsed = url.parse(href, true);
links.push(parsed.query.q);
});
return links;
}
/**
* Performs a Google search
*
* @param {String} term The term to search
* @param {Number} limit The result limit
* @return {Promise} promise A promise of the execution chain
*/
Google.prototype.search = function(term, limit) {
var promises = [];
var size = limit || LIMIT;
for (var i = START_AT_PAGE; i < size; i += LIMIT) {
var promise = this.prepare(HOST + PATH, { q: term, start: i });
promises.push(promise);
}
return this.execute(promises);
}
util.inherits(Google, Scraper);
module.exports = Google;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment