-
-
Save DenisMir/3f9c45f72d1e95479498 to your computer and use it in GitHub Desktop.
sample scraping
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// require the new modules and set up the vars | |
var express = require('express'), | |
jjw = require('jjw'), | |
app, | |
GOOGLE_SEARCH_URI; | |
// define the constants | |
GOOGLE_SEARCH_URI = "http://www.google.de/search?q="; | |
// create the express (node) server | |
app = express.createServer(); | |
// setting up some express related stuff | |
app.use(express.bodyDecoder()); | |
app.set( "view engine","html"); | |
app.set("view options", {layout: false}); | |
app.register(".html", require("jqtpl")); | |
app.configure(function(){ | |
app.use(express.staticProvider(__dirname + '/public')); | |
}); | |
// setting up the scraper loop handling all HTTP methods (scraping on post of query string) | |
app.all('/scrape', function(req, res){ | |
if(req.body){ | |
var uri = GOOGLE_SEARCH_URI + encodeURIComponent(req.body.q); | |
console.log("scraping the uri: " + uri); | |
jjw(uri, scrape, function(err, scrapeRes) { | |
if (err) throw err | |
console.log("scraping done..."); | |
res.render("scrape", {locals: scrapeRes}); | |
}); | |
}else{ | |
res.render("scrape"); | |
} | |
}); | |
// scrape related | |
var scrape = { | |
results: function($){ | |
var res = []; | |
$("li.g").each(function(){ | |
var linkNode, descNode, link, text, desc; | |
$(this).find("span.f").remove(); // clean up the description | |
linkNode = $(this).find("a.l"); | |
descNode = $(this).find("div.s") | |
link = linkNode.attr("href"); | |
text = linkNode.text(); | |
desc = descNode.remove("span.f").text(); | |
if(link && text && desc){ | |
res.push({text: text, link: link, desc: desc}); | |
} | |
}); | |
return res; | |
} | |
}; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment