Skip to content

Instantly share code, notes, and snippets.

@dev4dev
Created March 9, 2012 21:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dev4dev/2008684 to your computer and use it in GitHub Desktop.
Save dev4dev/2008684 to your computer and use it in GitHub Desktop.
parse google by nodejs
var http = require('http-get'),
HTML5 = require('html5'),
Script = process.binding('evals').Script,
util = require('util'),
jsdom = require('jsdom'),
window = jsdom.jsdom(null, null, {parser: HTML5}).createWindow(),
url = require('url');
var parser = new HTML5.Parser({
document: window.document
});
var options = {
url: 'https://www.google.com/search?num=10&start=0&hl=en&safe=off&q=nodejs+parser'
};
http.get(options, function (error, result) {
if (error) {
console.error(error);
} else {
parser.parse(result.buffer);
var ires = parser.document.getElementById('ires');
var as = ires.getElementsByTagName('a')
var out = [];
for (var i=0; i < as.length; i++) {
try {
href = url.parse(as[i].href, true).query.q || '';
}
catch (e) {
}
if (!/^related/i.test(href)) {
if (out.indexOf(href) == -1) {
out.push(href);
}
};
};
}
console.log(out);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment