-
-
Save artjomb/916eda32f0e16f987a00 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://www.houzz.com/pro/andersondesignstudio/anderson-design-studio | |
A | |
B | |
B1 | |
B2 | |
B3 | |
B4 | |
C | |
D | |
9 | |
Inside function that extracts data | |
http://www.houzz.com/pro/norrisarchitecture/norris-architecture | |
A | |
B | |
B1 | |
B2 | |
B3 | |
B4 | |
C | |
D | |
10 | |
Inside function that extracts data | |
http://www.houzz.com/pro/ameliedegaulle/amelie-de-gaulle-interiors | |
A | |
B | |
B1 | |
B2 | |
B3 | |
B4 | |
C | |
D | |
11 | |
Inside function that extracts data | |
http://www.houzz.com/pro/crowellinteriors/crowell-co-interiors | |
A | |
B | |
B1 | |
B2 | |
B3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var casper = require('casper').create(); | |
var page2 = 2; | |
jsonObj = { data : [] }; | |
var url = 'http://www.houzz.com/professionals/c/Nashville--TN/p/15'; | |
var webPage = require('webpage'); | |
zapTitle = []; | |
zapContact = []; | |
zapServices = []; | |
var page = webPage.create(); | |
var nextBtn = "a.navigation-button.next"; | |
var allLinks = []; | |
// http://docs.casperjs.org/en/latest/events-filters.html#remote-message | |
casper.on("remote.message", function(msg) { | |
this.echo("Console: " + msg); | |
}); | |
// http://docs.casperjs.org/en/latest/events-filters.html#page-error | |
casper.on("page.error", function(msg, trace) { | |
this.echo("Error: " + msg); | |
// maybe make it a little fancier with the code from the PhantomJS equivalent | |
}); | |
// http://docs.casperjs.org/en/latest/events-filters.html#resource-error | |
casper.on("resource.error", function(resourceError) { | |
this.echo("ResourceError: " + JSON.stringify(resourceError, undefined, 4)); | |
}); | |
// http://docs.casperjs.org/en/latest/events-filters.html#page-initialized | |
casper.on("page.initialized", function(page) { | |
// CasperJS doesn't provide `onResourceTimeout`, so it must be set through | |
// the PhantomJS means. This is only possible when the page is initialized | |
page.onResourceTimeout = function(request) { | |
console.log('Response Timeout (#' + request.id + '): ' + JSON.stringify(request)); | |
}; | |
}); | |
casper.start(url); | |
casper.waitForSelector(nextBtn, processPage); | |
function processPage() { | |
for (var i = 1; i <= page2; i = i + 1) { | |
this.then(function(){ | |
console.log(i); | |
var pageData = this.evaluate(getPageData); | |
allLinks = allLinks.concat(pageData); | |
console.log(allLinks); | |
if (!this.exists(nextBtn)) { | |
return; | |
} | |
this.thenClick(nextBtn).then(function() { | |
this.echo(this.getCurrentUrl()); | |
}); | |
}); | |
}; | |
} | |
function getPageData(){ | |
//return document.title; | |
var links = document.getElementsByClassName('pro-title'); | |
links = Array.prototype.map.call(links,function(link){ | |
return link.getAttribute('href'); | |
}); | |
return links; | |
} | |
casper.then(function(){ | |
this.each(allLinks,function(self,link){ | |
console.log("Inside the each function"); | |
console.log(link); | |
this.thenOpen(link,function(a){ | |
"use strict"; | |
console.log("Inside function that extracts data"); | |
console.log(link); | |
var description = this.fetchText('div.profile-about div:nth-child(1)'); | |
description = description.replace(/[\t\n]/g,""); | |
console.log("A"); | |
var name = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(2) div.info-list-text'); | |
name = name.replace(/[<b>Contact</b>: ]/g,""); | |
console.log("B"); | |
var title = this.fetchText('a.profile-full-name'); | |
console.log("B1"); | |
var contact = this.fetchText('div.profile-about div:nth-child(1)'); | |
console.log("B2"); | |
var services = this.getHTML('div.info-list-text span:nth-child(2) span'); | |
console.log("B3"); | |
var location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span'); // FAILS HERE | |
console.log("B4"); | |
var reviews = this.getHTML('div.pro-rating a span.pro-review-string span'); | |
console.log("C"); | |
jsonObj.data.push({ | |
"title" : title, | |
"contact" : contact, | |
"services" : services, | |
"name" : name, | |
"location" : location, | |
"description" : description, | |
"reviews" : reviews | |
}); | |
console.log("D"); | |
/*casper.open('https://zapier.com/hooks/catch/29s1m6/', { | |
method: 'post', | |
data: { | |
"title" : this.fetchText('a.profile-full-name'), | |
"contact" : this.getHTML('div.pro-contact-methods span.pro-contact-text:nth-child(2)'), | |
"services" : this.getHTML('div.info-list-text span:nth-child(2) span'), | |
"name" : name, | |
"location" : this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span'), | |
"description" : description, | |
"reviews" : this.getHTML('div.pro-rating a span.pro-review-string span') | |
} | |
});*/ | |
}).then(function() { | |
console.log(jsonObj.data.length); | |
//console.log(jsonObj); | |
//if (jsonObj.data.length == 13) { | |
//console.log(jsonObj.data[13].title); | |
//} | |
/*for(var i = 0; i < jsonObj.data.length; i = i + 1 ) { | |
console.log(i); | |
console.log("zaptitle"); | |
//zapTitle.push(jsonObj.data[i]); | |
console.log(jsonObj.data[i].title); | |
//} | |
}*/ | |
//require('utils').dump(jsonObj.data[2].title); | |
//require('utils').dump(jsonObj); | |
//require('utils').dump(jsonObj.data[8]); | |
//require('utils').dump(zapTitle); | |
//for(var i = 0; i < jsonObj.data.length; i = i + 1 ) { | |
//zapServices.push(jsonObj.data[i].services); | |
//} | |
/*casper.open('https://zapier.com/hooks/catch/29s1m6/', { | |
method: 'post', | |
data: {"title" : zapTitle, | |
//"contact" : zapContact, | |
"services" : zapServices | |
}*/ | |
}); | |
}); | |
}); | |
casper.run(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment