Created
August 25, 2019 19:24
-
-
Save Hyllesen/d831f6fd4fc28cb5362e6f178457db77 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require("puppeteer"); | |
const cheerio = require("cheerio"); | |
const sample = { | |
guests: 1, | |
bedrooms: 1, | |
beds: 1, | |
baths: 1, | |
pesosPerNight: 350 | |
}; | |
let browser; | |
async function scrapeHomesIndexPage(url, page) { | |
try { | |
await page.goto(url, { waitUntil: "networkidle2" }); | |
const html = await page.evaluate(() => document.body.innerHTML); | |
const $ = await cheerio.load(html); | |
const homes = $("[itemprop='url']") | |
.map((i, element) => { | |
const url = $(element).attr("content"); | |
//I get undefined or null at the airbnb.com in content url for some reason, so I'll just take the end part | |
const splitted = url.split("rooms"); | |
return "https://airbnb.com/rooms" + splitted[1]; | |
}) | |
.get(); | |
console.log(homes); | |
return homes; | |
} catch (err) { | |
console.error("Error scraping homes page"); | |
console.error(err); | |
} | |
} | |
async function scrapeDescriptionPage(url, page) { | |
try { | |
await page.goto(url, { waitUntil: "networkidle2" }); | |
const html = await page.evaluate(() => document.body.innerHTML); | |
const $ = await cheerio.load(html); | |
const pricePerNight = $( | |
"#room > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > div > span > span" | |
).text(); | |
console.log("price pr. night"); | |
console.log(pricePerNight); | |
} catch (err) { | |
console.error("error scraping description page"); | |
console.error(err); | |
} | |
} | |
async function main() { | |
browser = await puppeteer.launch({ headless: false }); | |
const homesIndexPage = await browser.newPage(); | |
//It's important to have a date selected to get prices in Airbnb | |
const homes = await scrapeHomesIndexPage( | |
"https://www.airbnb.com/s/Wellington--New-Zealand/homes?refinement_paths%5B%5D=%2Fhomes¤t_tab_id=home_tab&selected_tab_id=home_tab&place_id=ChIJy3TpSfyxOG0RcLQTomPvAAo&search_type=filter_change&screen_size=large&checkin=2019-09-02&checkout=2019-09-06&s_tag=9E4kDUxC", | |
homesIndexPage | |
); | |
const descriptionPage = await browser.newPage(); | |
for (var i = 0; i < homes.length; i++) { | |
await scrapeDescriptionPage(homes[i], descriptionPage); | |
} | |
console.log(homes); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment