Skip to content

Instantly share code, notes, and snippets.

@andrefa
Created August 12, 2020 13:20
Show Gist options
  • Save andrefa/a2a1650e84331735726858af176bfc86 to your computer and use it in GitHub Desktop.
Save andrefa/a2a1650e84331735726858af176bfc86 to your computer and use it in GitHub Desktop.
const got = require('got');
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const cgUrl = 'https://vancouver.craigslist.org/search/van/apa?search_distance=4&postal=V6E1W5&min_bedrooms=2&minSqft=600&availabilityMode=0&pets_cat=1&laundry=1&sale_date=all+dates';
const pageSize = 120;
const find = (element, expression) => element.querySelector(expression) || {}
const list = (element, expression) => [...element.querySelectorAll(expression)]
const cleanText = (text = '') => text.replace(/\n|[-(),$]/g, '').replace(/[ ]+/g, ' ').trim()
const fetchApts = async () => {
let fullResultSet = [];
let currentPage = [];
let offset = 0;
do {
console.log('Fetching for offset ' + offset)
currentPage = await process(offset)
offset += pageSize
fullResultSet = [ ...fullResultSet, ...currentPage ]
} while(currentPage.length === pageSize)
return fullResultSet
}
const process = async (offset) => {
const response = await got(cgUrl + '&s=' + offset);
const dom = new JSDOM(response.body);
const listing = list(dom.window.document, '.result-row');
return listing.map( node => {
const price = cleanText(find(node, '.result-price').textContent)
const posted = find(node, '.result-date').textContent
const title = find(node, '.result-title').textContent
const hood = cleanText(find(node, '.result-hood').textContent)
const housing = cleanText(find(node, '.housing').textContent)
const link = find(node, '.result-title').href
const img = find(node, 'img').src
return {
hood,
housing,
link,
price,
posted,
title,
img,
}
})
}
module.exports = {
fetchApts
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment