Skip to content

Instantly share code, notes, and snippets.

@ricardoalcocer
Created November 18, 2012 21:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ricardoalcocer/4107569 to your computer and use it in GitHub Desktop.
Save ricardoalcocer/4107569 to your computer and use it in GitHub Desktop.
ClasificadosOnline Scrapping Excercise
var http = require('http');
var options = {
host: 'query.yahooapis.com',
port: 80,
path: '/v1/public/yql?q=select%20*%20from%20html%20where%20url%3D%22http%3A%2F%2Fclasificadosonline.com%2Fm%2FMiscellaneosSearchM.asp%22%20and%0A%20%20%20%20%20%20xpath%3D%27%2Fhtml%2Fbody%2Fdiv%2Fdiv%2Fdiv%2Fform%2Fselect%27&format=json&diagnostics=true&callback=',
method: 'GET'
};
var req = http.get(options, function(httpObj) {
var pageData = "";
httpObj.setEncoding('utf8');
httpObj.on('data', function (chunk) {
pageData += chunk;
});
httpObj.on('end', function(){
var data=JSON.parse(pageData.toString());
var results=data.query.results.select.option
for (var i=0;i<results.length;i++){
console.log(results[i].value + ' - ' + results[i].content);
}
});
});
var http = require('http');
var offset=0;
var category=36;
var options = {
host: 'query.yahooapis.com',
port: 80,
path: '/v1/public/yql?q=select%20*%20from%20html%20where%20url%3D%22http%3A%2F%2Fclasificadosonline.com%2Fm%2FMiscellaneosListingM.asp%3FMisCat%3D' + category + '%26Submit2%3DSearch%2B-%2BBusqueda%26keyword%3D%26Desc%3D%26offset%3D' + offset + '%22%20and%0A%20%20%20%20%20%20xpath%3D%27%2Fhtml%2Fbody%2Fdiv%2Fdiv%2Fdiv%2Fform%2Ftable%27&format=json&diagnostics=true&callback=',
method: 'GET'
};
var req = http.get(options, function(httpObj) {
var pageData = "";
httpObj.setEncoding('utf8');
httpObj.on('data', function (chunk) {
pageData += chunk;
});
httpObj.on('end', function(){
var data=JSON.parse(pageData.toString());
var results=data.query.results.table.tr;
for (var i=0;i<results.length;i++){
var itemName=results[i].td[0].a.strong;
var url=results[i].td[0].a.href;
var location=results[i].td[0].a.span[0].content;
var price=results[i].td[0].a.span[1].content;
console.log(itemName);
console.log(url);
console.log(location);
console.log(price);
console.log('\n')
}
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment