Created
January 25, 2020 22:12
-
-
Save Hyllesen/e5932754783f38f6b782490072959009 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const cheerio = require("cheerio"); | |
const mongoose = require("mongoose"); | |
const puppeteer = require("puppeteer"); | |
const prompt = require("prompt-sync")({ sigint: true }); | |
const fs = require("fs"); | |
const originUrl = "https://www.airbnb.com/s/Ohio/homes"; | |
async function goToAirbnb(page) { | |
await page.goto(originUrl); | |
page.waitForNavigation(); | |
return page; | |
} | |
async function searchAirbnbStays(page, queryText) { | |
try { | |
await page.type( | |
"input#Koan-magic-carpet-koan-search-bar__input", | |
queryText, | |
{ delay: 75 } | |
); | |
await page.click("li#Koan-magic-carpet-koan-search-bar__option-1", { | |
delay: 75 | |
}); | |
await Promise.all([ | |
page.waitForNavigation(), | |
page.click("button._1vs0x720", { delay: 100 }) | |
]); | |
return page; | |
} catch (err) { | |
console.log(err); | |
} | |
} | |
async function parseListings(page) { | |
try { | |
const html = await page.evaluate(() => document.body.innerHTML); | |
const $ = cheerio.load(html); | |
//await page.waitFor("meta[itemprop='url']"); Why have this await here? The page is already in cheerio at this point | |
const listing_links = $("[itemprop='url']") | |
.map((i, element) => "https://" + $(element).attr("content")) //The url still have undefined, but that's not a big deal to replace with airbnb.com | |
.get(); | |
console.log(listing_links); | |
return listing_links; | |
} catch (error) { | |
console.log(error); | |
} | |
} | |
async function storeData(data) { | |
try { | |
fs.appendFileSync("./test.json", JSON.stringify(data)); | |
} catch (err) { | |
console.log(err); | |
} | |
} | |
async function main() { | |
// await connectToMongoDB(); | |
const browser = await puppeteer.launch({ headless: false }); | |
var page = await browser.newPage(); | |
page = await goToAirbnb(page); | |
// page = await searchAirbnbStays(page, "ohio"); | |
const listing_links = await parseListings(page); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment