Skip to content

Instantly share code, notes, and snippets.

@artjomb
Created February 19, 2016 17:00
Show Gist options
  • Save artjomb/916eda32f0e16f987a00 to your computer and use it in GitHub Desktop.
Save artjomb/916eda32f0e16f987a00 to your computer and use it in GitHub Desktop.
http://www.houzz.com/pro/andersondesignstudio/anderson-design-studio
A
B
B1
B2
B3
B4
C
D
9
Inside function that extracts data
http://www.houzz.com/pro/norrisarchitecture/norris-architecture
A
B
B1
B2
B3
B4
C
D
10
Inside function that extracts data
http://www.houzz.com/pro/ameliedegaulle/amelie-de-gaulle-interiors
A
B
B1
B2
B3
B4
C
D
11
Inside function that extracts data
http://www.houzz.com/pro/crowellinteriors/crowell-co-interiors
A
B
B1
B2
B3
var casper = require('casper').create();
var page2 = 2;
jsonObj = { data : [] };
var url = 'http://www.houzz.com/professionals/c/Nashville--TN/p/15';
var webPage = require('webpage');
zapTitle = [];
zapContact = [];
zapServices = [];
var page = webPage.create();
var nextBtn = "a.navigation-button.next";
var allLinks = [];
// http://docs.casperjs.org/en/latest/events-filters.html#remote-message
casper.on("remote.message", function(msg) {
this.echo("Console: " + msg);
});
// http://docs.casperjs.org/en/latest/events-filters.html#page-error
casper.on("page.error", function(msg, trace) {
this.echo("Error: " + msg);
// maybe make it a little fancier with the code from the PhantomJS equivalent
});
// http://docs.casperjs.org/en/latest/events-filters.html#resource-error
casper.on("resource.error", function(resourceError) {
this.echo("ResourceError: " + JSON.stringify(resourceError, undefined, 4));
});
// http://docs.casperjs.org/en/latest/events-filters.html#page-initialized
casper.on("page.initialized", function(page) {
// CasperJS doesn't provide `onResourceTimeout`, so it must be set through
// the PhantomJS means. This is only possible when the page is initialized
page.onResourceTimeout = function(request) {
console.log('Response Timeout (#' + request.id + '): ' + JSON.stringify(request));
};
});
casper.start(url);
casper.waitForSelector(nextBtn, processPage);
function processPage() {
for (var i = 1; i <= page2; i = i + 1) {
this.then(function(){
console.log(i);
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
console.log(allLinks);
if (!this.exists(nextBtn)) {
return;
}
this.thenClick(nextBtn).then(function() {
this.echo(this.getCurrentUrl());
});
});
};
}
function getPageData(){
//return document.title;
var links = document.getElementsByClassName('pro-title');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
}
casper.then(function(){
this.each(allLinks,function(self,link){
console.log("Inside the each function");
console.log(link);
this.thenOpen(link,function(a){
"use strict";
console.log("Inside function that extracts data");
console.log(link);
var description = this.fetchText('div.profile-about div:nth-child(1)');
description = description.replace(/[\t\n]/g,"");
console.log("A");
var name = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(2) div.info-list-text');
name = name.replace(/[<b>Contact</b>: ]/g,"");
console.log("B");
var title = this.fetchText('a.profile-full-name');
console.log("B1");
var contact = this.fetchText('div.profile-about div:nth-child(1)');
console.log("B2");
var services = this.getHTML('div.info-list-text span:nth-child(2) span');
console.log("B3");
var location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span'); // FAILS HERE
console.log("B4");
var reviews = this.getHTML('div.pro-rating a span.pro-review-string span');
console.log("C");
jsonObj.data.push({
"title" : title,
"contact" : contact,
"services" : services,
"name" : name,
"location" : location,
"description" : description,
"reviews" : reviews
});
console.log("D");
/*casper.open('https://zapier.com/hooks/catch/29s1m6/', {
method: 'post',
data: {
"title" : this.fetchText('a.profile-full-name'),
"contact" : this.getHTML('div.pro-contact-methods span.pro-contact-text:nth-child(2)'),
"services" : this.getHTML('div.info-list-text span:nth-child(2) span'),
"name" : name,
"location" : this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span'),
"description" : description,
"reviews" : this.getHTML('div.pro-rating a span.pro-review-string span')
}
});*/
}).then(function() {
console.log(jsonObj.data.length);
//console.log(jsonObj);
//if (jsonObj.data.length == 13) {
//console.log(jsonObj.data[13].title);
//}
/*for(var i = 0; i < jsonObj.data.length; i = i + 1 ) {
console.log(i);
console.log("zaptitle");
//zapTitle.push(jsonObj.data[i]);
console.log(jsonObj.data[i].title);
//}
}*/
//require('utils').dump(jsonObj.data[2].title);
//require('utils').dump(jsonObj);
//require('utils').dump(jsonObj.data[8]);
//require('utils').dump(zapTitle);
//for(var i = 0; i < jsonObj.data.length; i = i + 1 ) {
//zapServices.push(jsonObj.data[i].services);
//}
/*casper.open('https://zapier.com/hooks/catch/29s1m6/', {
method: 'post',
data: {"title" : zapTitle,
//"contact" : zapContact,
"services" : zapServices
}*/
});
});
});
casper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment