Skip to content

Instantly share code, notes, and snippets.

@thayton
Created March 1, 2019 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thayton/170ed94e21f4de5c6485751e4f0c4e6d to your computer and use it in GitHub Desktop.
Save thayton/170ed94e21f4de5c6485751e4f0c4e6d to your computer and use it in GitHub Desktop.
/*
* $ npm init -y
* $ npm install puppeteer --save
* $ node sheahomes.js
*/
const puppeteer = require('puppeteer');
const url = 'https://www.sheahomes.com/new-homes/colorado/denver-area/parker/stonewalk-at-stepping-stone/';
async function getLot(page, url) {
await page.goto(url);
await page.waitFor('p.large');
let scripts = await page.$x('//script[@type="application/ld+json"]');
let data = await (await page.evaluateHandle(script => script.innerText, scripts[0])).jsonValue();
let addr = JSON.parse(data);
addr = addr['address'];
let text = await page.$eval('p.large', p => p.innerText);
let price = /Priced From \$([\d,]+)/.exec(text)[1];
let sqft = /([\d,]+) Sq\. Ft/.exec(text)[1];
return {
'blurb': text,
'price': price,
'sqft': sqft,
'address': addr['streetAddress'] + '\n' +
addr['addressLocality'] + ', ' + addr['addressRegion'] + ', ' + addr['postalCode']
};
}
async function main() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
await page.waitFor('section#qmi-homes div.card-content');
const lots = await page.$$eval('section#qmi-homes div.card-content', (divs) => {
var lots = [];
divs.forEach(d => {
let a = d.querySelector('a');
lots.push({'url': a.href})
});
return lots;
});
for (const lot of lots) {
let lotData = await getLot(page, lot['url']);
Object.assign(lot, lotData);
}
await browser.close();
return lots;
}
main().then(lots => console.log( JSON.stringify(lots, null, 2)) );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment