Skip to content

Instantly share code, notes, and snippets.

@Hyllesen
Created August 25, 2019 19:24
Show Gist options
  • Save Hyllesen/d831f6fd4fc28cb5362e6f178457db77 to your computer and use it in GitHub Desktop.
Save Hyllesen/d831f6fd4fc28cb5362e6f178457db77 to your computer and use it in GitHub Desktop.
const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
const sample = {
guests: 1,
bedrooms: 1,
beds: 1,
baths: 1,
pesosPerNight: 350
};
let browser;
async function scrapeHomesIndexPage(url, page) {
try {
await page.goto(url, { waitUntil: "networkidle2" });
const html = await page.evaluate(() => document.body.innerHTML);
const $ = await cheerio.load(html);
const homes = $("[itemprop='url']")
.map((i, element) => {
const url = $(element).attr("content");
//I get undefined or null at the airbnb.com in content url for some reason, so I'll just take the end part
const splitted = url.split("rooms");
return "https://airbnb.com/rooms" + splitted[1];
})
.get();
console.log(homes);
return homes;
} catch (err) {
console.error("Error scraping homes page");
console.error(err);
}
}
async function scrapeDescriptionPage(url, page) {
try {
await page.goto(url, { waitUntil: "networkidle2" });
const html = await page.evaluate(() => document.body.innerHTML);
const $ = await cheerio.load(html);
const pricePerNight = $(
"#room > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > span > span"
).text();
console.log("price pr. night");
console.log(pricePerNight);
} catch (err) {
console.error("error scraping description page");
console.error(err);
}
}
async function main() {
browser = await puppeteer.launch({ headless: false });
const homesIndexPage = await browser.newPage();
//It's important to have a date selected to get prices in Airbnb
const homes = await scrapeHomesIndexPage(
"https://www.airbnb.com/s/Wellington--New-Zealand/homes?refinement_paths%5B%5D=%2Fhomes&current_tab_id=home_tab&selected_tab_id=home_tab&place_id=ChIJy3TpSfyxOG0RcLQTomPvAAo&search_type=filter_change&screen_size=large&checkin=2019-09-02&checkout=2019-09-06&s_tag=9E4kDUxC",
homesIndexPage
);
const descriptionPage = await browser.newPage();
for (var i = 0; i < homes.length; i++) {
await scrapeDescriptionPage(homes[i], descriptionPage);
}
console.log(homes);
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment