Skip to content

Instantly share code, notes, and snippets.

@adrianhorning08
Last active November 3, 2024 07:25
Show Gist options
  • Save adrianhorning08/6305b872e172c4cc74edcac57370c1f5 to your computer and use it in GitHub Desktop.
Save adrianhorning08/6305b872e172c4cc74edcac57370c1f5 to your computer and use it in GitHub Desktop.
Zillow Scraper
async function scrollDown() {
const wrapper = document.querySelector("#search-page-list-container");
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 600;
var timer = setInterval(async () => {
var scrollHeightBefore = wrapper.scrollHeight;
wrapper.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeightBefore) {
totalHeight = 0;
// Calculate scrollHeight after waiting
var scrollHeightAfter = wrapper.scrollHeight;
if (scrollHeightAfter > scrollHeightBefore) {
// More content loaded, keep scrolling
return;
} else {
// No more content loaded, stop scrolling
clearInterval(timer);
resolve();
}
}
}, 400);
});
}
function getListings() {
const listings = [];
const lis = document.querySelectorAll("#search-page-list-container ul li");
for (let i = 0; i < lis.length; i++) {
const listing = lis[i];
// if listing contains class ListItem, then it's a listing
const classes = Array.from(listing.classList).join(" ");
if (classes.includes("ListItem")) {
// get the script tag
const script = listing.querySelector("script");
if (!script) {
continue;
}
// find the ul who has a class that includes StyledPropertyCardHomeDetailsList
const beds = listing
?.querySelector(
'ul[class*="StyledPropertyCardHomeDetailsList"] li:nth-child(1)'
)
?.textContent?.match(/\d+/)?.[0];
const baths = listing
?.querySelector(
'ul[class*="StyledPropertyCardHomeDetailsList"] li:nth-child(2)'
)
?.textContent?.match(/\d+/)?.[0];
// get the span with data-test=property-card-price
const priceString = listing.querySelector(
'span[data-test="property-card-price"]'
)?.textContent;
const price = Number(priceString.replace(/[^0-9.-]+/g, ""));
const json = JSON.parse(script.textContent);
listings.push({
...json.address,
...json.floorSize,
...json.geo,
priceString: priceString,
price,
beds: beds ? Number(beds) : "",
baths: baths ? Number(baths) : "",
sqft: json?.floorSize?.value
? Number(json?.floorSize?.value.replace(/[^0-9.-]+/g, ""))
: "",
name: json.name,
url: json.url,
});
}
}
return listings;
}
function createCSV(jsonData, fileName) {
// Convert JSON to CSV
const csvData = [];
// Extract the headers
const headers = Object.keys(jsonData[0]);
csvData.push(headers.join(","));
jsonData.forEach((item) => {
const row = [];
for (const key in item) {
if (item.hasOwnProperty(key)) {
if (typeof item[key] === "number") {
row.push(item[key]);
continue;
}
const value = item[key]?.includes(",") ? `"${item[key]}"` : item[key];
row.push(value);
}
}
csvData.push(row.join(","));
});
// Create a Blob containing the CSV data
const csvBlob = new Blob([csvData.join("\n")], {
type: "text/csv;charset=utf-8",
});
// Create a URL for the Blob
const csvUrl = URL.createObjectURL(csvBlob);
// Create a link element
const link = document.createElement("a");
link.href = csvUrl;
link.target = "_blank";
link.download = fileName;
// Append the link to the body
document.body.appendChild(link);
// Trigger a click event on the link
link.click();
// Remove the link and revoke the Blob URL
document.body.removeChild(link);
URL.revokeObjectURL(csvUrl);
}
async function scrapeZillow() {
let page = 1;
const allListings = [];
await scrollDown();
const listings = getListings();
console.log("listings", listings);
allListings.push(...listings);
// a title=Next page
let nextButton = document.querySelector('a[title="Next page"]');
// check aria-disabled
let disabled = nextButton?.getAttribute("aria-disabled");
if (nextButton && disabled !== "true") {
nextButton.click();
}
while (nextButton && disabled !== "true") {
console.log("page", page);
await scrollDown();
const listings = getListings();
console.log(`You scraped ${listings.length} listings!`);
console.log(
`Check out my course for all my web scraping secrets: https://adrianhorning.gumroad.com/l/the-ultimate-web-scraping-course`
);
allListings.push(...listings);
nextButton = document.querySelector('a[title="Next page"]');
disabled = nextButton.getAttribute("aria-disabled");
if (disabled === "true") {
break;
}
nextButton.click();
page++;
}
console.log(`Congrats! 🎉 You scraped ${allListings.length} listings!`);
console.log('Check out my web scraping course: https://adrianhorning.gumroad.com/l/the-ultimate-web-scraping-course')
createCSV(allListings, `zillowListings-${new Date().getTime()}.csv`);
}
await scrapeZillow();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment