Skip to content

Instantly share code, notes, and snippets.

@Hyllesen
Created January 25, 2020 22:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Hyllesen/e5932754783f38f6b782490072959009 to your computer and use it in GitHub Desktop.
Save Hyllesen/e5932754783f38f6b782490072959009 to your computer and use it in GitHub Desktop.
const cheerio = require("cheerio");
const mongoose = require("mongoose");
const puppeteer = require("puppeteer");
const prompt = require("prompt-sync")({ sigint: true });
const fs = require("fs");
const originUrl = "https://www.airbnb.com/s/Ohio/homes";
async function goToAirbnb(page) {
await page.goto(originUrl);
page.waitForNavigation();
return page;
}
async function searchAirbnbStays(page, queryText) {
try {
await page.type(
"input#Koan-magic-carpet-koan-search-bar__input",
queryText,
{ delay: 75 }
);
await page.click("li#Koan-magic-carpet-koan-search-bar__option-1", {
delay: 75
});
await Promise.all([
page.waitForNavigation(),
page.click("button._1vs0x720", { delay: 100 })
]);
return page;
} catch (err) {
console.log(err);
}
}
async function parseListings(page) {
try {
const html = await page.evaluate(() => document.body.innerHTML);
const $ = cheerio.load(html);
//await page.waitFor("meta[itemprop='url']"); Why have this await here? The page is already in cheerio at this point
const listing_links = $("[itemprop='url']")
.map((i, element) => "https://" + $(element).attr("content")) //The url still have undefined, but that's not a big deal to replace with airbnb.com
.get();
console.log(listing_links);
return listing_links;
} catch (error) {
console.log(error);
}
}
async function storeData(data) {
try {
fs.appendFileSync("./test.json", JSON.stringify(data));
} catch (err) {
console.log(err);
}
}
async function main() {
// await connectToMongoDB();
const browser = await puppeteer.launch({ headless: false });
var page = await browser.newPage();
page = await goToAirbnb(page);
// page = await searchAirbnbStays(page, "ohio");
const listing_links = await parseListings(page);
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment