Skip to content

Instantly share code, notes, and snippets.

@ericabouaf
Created May 4, 2009 00:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericabouaf/106224 to your computer and use it in GitHub Desktop.
Save ericabouaf/106224 to your computer and use it in GitHub Desktop.
// Defini la structure des pages et le xpath pour recuperer les donnees
var xpath = '//table[@class="sobi2Listing"]/tr//td';
var baseUrl = "http://www.abcvert.fr/annuaire/energies-renouvelables/solaire";
var getUrl = function(pageIndex) {
return baseUrl + ((pageIndex == 0) ? ".html" : "/Page-"+(pageIndex+1)+".html");
};
// Recuperation des contacts
var contacts = {};
// Nombre de pages a scraper (on peut monter a 35)
var pageStart = 0;
var nToScrap = 5;
// Iteration sur toutes les pages
for(var i = pageStart ; i < nToScrap ; i++) {
// la requete yql magique
var url = getUrl(i);
var q = "select * from html where url='"+getUrl(i)+"' and xpath='"+xpath+"'";
var query = y.query(q);
// Transforme le resultat en Javascript (c'est du xml a la base)
var json = y.xmlToJson(query.results);
var list = json.results.td
// Itere sur les contacts de la page
if( list && list["length"]) {
for(var c = 0 ; c < list.length ; c++) {
var contactHtml = list[c];
try {
// Cree l'objet contact tout bien parse
var name = contactHtml.p["0"].a.title;
var item = {};
item.name = name;
item.url = contactHtml.p["0"].a.href;
var span = contactHtml.span;
if(span) {
if(span["0"]) item.adress = span["0"].content;
if(span["1"]) item.zipcode = span["1"].content;
if(span["2"]) item.city = span["2"].content;
}
contacts[name] = item;
} catch(ex){}
}
}
}
// Objet de reponse:
response.object = {
version: 0.10,
contacts: contacts
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment