-
-
Save bryant988/9510cff838d86dcefa3b9ea3835b8552 to your computer and use it in GitHub Desktop.
/** | |
* NOTE: this specifically works if the house is for sale since it renders differently. | |
* This will download the highest resolution available per image. | |
*/ | |
/** | |
* STEP 1: Make sure to *SCROLL* through all images so they appear on DOM. | |
* No need to click any images. | |
*/ | |
/** | |
* STEP 2: Open Dev Tools Console. | |
* Copy and paste code below | |
*/ | |
const script = document.createElement('script'); | |
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"; | |
script.onload = () => { | |
$ = jQuery.noConflict(); | |
const imageList = $('ul.media-stream li picture source[type="image/jpeg"]').map(function () { | |
const srcset = $(this).attr('srcset').split(' '); // get highest res urls for each image | |
return srcset[srcset.length - 2] | |
}).toArray(); | |
const delay = ms => new Promise(res => setTimeout(res, ms)); // promise delay | |
// get all image blobs in parallel first before downloading for proper batching | |
Promise.all(imageList.map(i => fetch(i)) | |
).then(responses => | |
Promise.all(responses.map(res => res.blob())) | |
).then(async (blobs) => { | |
for (let i = 0; i < blobs.length; i++) { | |
if (i % 10 === 0) { | |
console.log('1 sec delay...'); | |
await delay(1000); | |
} | |
var a = document.createElement('a'); | |
a.style = "display: none"; | |
console.log(i); | |
var url = window.URL.createObjectURL(blobs[i]); | |
a.href = url; | |
a.download = i + ''; | |
document.body.appendChild(a); | |
a.click(); | |
setTimeout(() => { | |
window.URL.revokeObjectURL(url); | |
}, 100); | |
} | |
}); | |
}; | |
document.getElementsByTagName('head')[0].appendChild(script); |
EDIT = Only worked for a closed listing.I too am now unable to use the script for an open listing as of 12/28/23.
TY! Still working as of today, using timbitzen comments. I'm using Chromium Version 120.0.6099.109 (Official Build) for Linux Mint (64-bit).
Downloaded 58 webp images from a listing, no problem. TY again, to all contributors!
Also, props to raghucbz November 18 2022 wisdom in this GitHub thread, for how to batch convert webp to jpg. That website works great too. TY!
@T150 Can you share a listing it doesn't work on? I just tested it on an open listing and no problems downloading the images.
This worked for me, but only after clicking to load the first image in the lightbox, and then advancing through each photo individually using the arrow key. Once I was on the last image, I then ran the script through the console, and was successful. Using: Mac Chrome Version 120.0.6099.199 (Official Build) (arm64)
const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";
script.onload = async () => {
$ = jQuery.noConflict();
const delay = (ms) => new Promise((res) => setTimeout(res, ms)); // promise delay
// can't map since there isn't a list, so just push as we find more.
const imageList = [];
// while there is a next button
while (
$(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
$(".photo-carousel-icon-wrapper .icon-reload").length
) {
// Wait a little to make sure the next image source is loaded. If you get an error, increasing the timeout might help
await delay(200);
// Last image, break out of loop
if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
break;
}
const srcs = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset").split(" ");
const src = srcs[srcs.length - 2];
// just in case... let make sure the src is not already in the list.
if (imageList.indexOf(src) === -1) {
imageList.push(src);
}
// go to the next slide
$(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
}
// get all image blobs in parallel first before downloading for proper batching
Promise.all(imageList.map((i) => fetch(i)))
.then((responses) => Promise.all(responses.map((res) => res.blob())))
.then(async (blobs) => {
for (let i = 0; i < blobs.length; i++) {
if (i % 10 === 0) {
console.log("1 sec delay...");
await delay(1000);
}
let a = document.createElement("a");
a.style = "display: none";
console.log(i);
let url = window.URL.createObjectURL(blobs[i]);
a.href = url;
a.download = i + "";
document.body.appendChild(a);
a.click();
setTimeout(() => {
window.URL.revokeObjectURL(url);
}, 100);
}
});
};
document.getElementsByTagName("head")[0].appendChild(script);
@frederickjansen @knowyourrivals Okay, working as advertised. Perhaps I was not drilling down enough into the Lightbox (pictures only) feature of the Zillow webpages (so I followed the process @knowyourrivals described). And all looks good now. Thanks for responding and motivating me to keep trying. Cheers!
here is the new script to download the highest resolution images, the issue with opening the list view it will show a highest resolution of 380 x 280. in the code below i replaced the the image url with uncropped_scaled_within_1344_1008.jpg which is the highest resolution available
const highRes = srcset[0].replace("-cc_ft_192.jpg","-uncropped_scaled_within_1344_1008.jpg")
use the same script and just replace imageList with this below code
const imageList = $('ul.media-stream li picture source[type="image/jpeg"]').map(function () {
const srcset = $(this).attr('srcset').split(' '); // get highest res urls for each image
const highRes = srcset[0].replace("-cc_ft_192.jpg","-uncropped_scaled_within_1344_1008.jpg")
return highRes
}).toArray();
April 2024 was able to get this to work! I had to individually go through each image, but if I stopped and did it on the last image I only got the final photo downloaded. When going back to the first image, the code went through all the images for me and began download. I had to agree on chrome to allow "multiple downloads." Thank you SO much for posting about this. Saved me so much time.
This does not work for me. I do not see any pop-up windows. Also, if the pictures are being downloaded, did the code mention where the pictures will be stored at?
@medosf Which above script is this for?
Hi all,
@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,
Had to edit a lot of it, but this version that I made now works perfectly in one try:
const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";
script.onload = async () => {
const $ = jQuery.noConflict();
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
const imageList = [];
while (
$(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
$(".photo-carousel-icon-wrapper .icon-reload").length
) {
await delay(200);
if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
break;
}
const srcset = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset");
if (srcset) {
const srcs = srcset.split(" ");
const src = srcs[srcs.length - 2];
if (!imageList.includes(src)) {
imageList.push(src);
}
}
$(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
}
const responses = await Promise.all(imageList.map((src) => fetch(src)));
const blobs = await Promise.all(responses.map((res) => res.blob()));
for (let i = 0; i < blobs.length; i++) {
if (i % 10 === 0) {
console.log("1 sec delay...");
await delay(1000);
}
const a = document.createElement("a");
a.style.display = "none";
const url = window.URL.createObjectURL(blobs[i]);
a.href = url;
a.download = `${i}`;
document.body.appendChild(a);
a.click();
setTimeout(() => {
window.URL.revokeObjectURL(url);
}, 100);
}
};
document.head.appendChild(script);
Just open in DevConsole, run it - and you'll see the following:
For educational use only.
So I tested your code @Montana and it didnt work for me.....
Possibly due the the fact that Zillow change the url to a more dynamic url......
So I use there sister site Trulia.com and wrote the code below..
@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,
Please run inside chrome devtool console..
async function extractImageUrls(optionalUrl = null, delay = 5000) {
function extractFromPage(document) {
const container = document.querySelector('div[data-testid="grid-gallery"]');
const imageUrls = [];
if (container) {
const images = container.querySelectorAll('picture > img');
images.forEach(img => {
if (img.src) {
imageUrls.push(img.src);
}
});
}
return imageUrls;
}
function waitForElement(selector, timeout = 5000) {
return new Promise((resolve, reject) => {
const interval = 100;
const maxAttempts = timeout / interval;
let attempts = 0;
const intervalId = setInterval(() => {
const element = document.querySelector(selector);
if (element) {
clearInterval(intervalId);
resolve(element);
}
if (attempts >= maxAttempts) {
clearInterval(intervalId);
reject(new Error('Element not found within timeout period'));
}
attempts++;
}, interval);
});
}
function delayExecution(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
if (optionalUrl) {
try {
const response = await fetch(optionalUrl);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const text = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
// Wait for the modal content to load
await delayExecution(delay);
const imageUrls = extractFromPage(doc);
console.log(imageUrls);
} catch (error) {
console.error('Error fetching the specified URL:', error);
}
} else {
try {
// Wait for the modal content to load
await delayExecution(delay);
const imageUrls = extractFromPage(document);
console.log(imageUrls);
} catch (error) {
console.error('Error:', error);
}
}
}
// Usage:
// To extract image URLs from the current page/modal:
extractImageUrls();
// To extract image URLs from a specified URL:
extractImageUrls('https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab', 5000);
In my code please notice that you can run when current page/modal picture are in view...
OR
You can run remotely with a url (e.g: https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab)
you have to add the ?mid=0#lil-mediaTab on the end of the url
Hey @samjco,
This script will work if you go to the last modal of photos, then run it. Then you can rerun it on the #1 of the photos depending on how many are in the aggregate set:
const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";
script.onload = async () => {
$ = jQuery.noConflict();
const delay = (ms) => new Promise((res) => setTimeout(res, ms)); // promise delay
// can't map since there isn't a list, so just push as we find more.
const imageList = [];
// while there is a next button
while (
$(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
$(".photo-carousel-icon-wrapper .icon-reload").length
) {
// Wait a little to make sure the next image source is loaded. If you get an error, increasing the timeout might help
await delay(200);
// Last image, break out of loop
if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
break;
}
const srcs = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset").split(" ");
const src = srcs[srcs.length - 2];
// just in case... let make sure the src is not already in the list.
if (imageList.indexOf(src) === -1) {
imageList.push(src);
}
// go to the next slide
$(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
}
// get all image blobs in parallel first before downloading for proper batching
Promise.all(imageList.map((i) => fetch(i)))
.then((responses) => Promise.all(responses.map((res) => res.blob())))
.then(async (blobs) => {
for (let i = 0; i < blobs.length; i++) {
if (i % 10 === 0) {
console.log("1 sec delay...");
await delay(1000);
}
let a = document.createElement("a");
a.style = "display: none";
console.log(i);
let url = window.URL.createObjectURL(blobs[i]);
a.href = url;
a.download = i + "";
document.body.appendChild(a);
a.click();
setTimeout(() => {
window.URL.revokeObjectURL(url);
}, 100);
}
});
};
document.getElementsByTagName("head")[0].appendChild(script);
I really appreciate you sharing your script with everyone, I will definitely try it out.
Cheers,
Montana
OK. I've made a chrome ext!!
https://github.com/samjco/zillow-listing-image-extractor-chrome-extension
Nice, this worked well. Thank you!
This also worked for me. So long as you scroll down in the Zillow image gallery to get all the images it will gather all the images in a zip folder for you. I targeted all the .webp images since that's the image type that Zillow requests/uses.
// Load JSZip library
const script = document.createElement('script');
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js";
document.head.appendChild(script);
script.onload = function() {
// Function to download the zip file
function downloadZip(zip) {
zip.generateAsync({type: 'blob'}).then(function(content) {
const link = document.createElement('a');
link.href = URL.createObjectURL(content);
link.download = 'images.zip';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
});
}
// Function to gather and zip .webp image URLs from the network requests
function gatherAndZipImages() {
const imageUrls = [];
const zip = new JSZip();
const imgFolder = zip.folder("images");
// Get all network requests from the Performance tab
const requests = window.performance.getEntriesByType('resource');
requests.forEach((request, index) => {
if (request.initiatorType === 'img' && request.name.endsWith('.webp')) {
imageUrls.push(request.name);
}
});
if (imageUrls.length > 0) {
console.log('Image URLs:', imageUrls);
const downloadPromises = imageUrls.map((url, index) => {
return fetch(url).then(response => response.blob()).then(blob => {
imgFolder.file(`image_${index + 1}.webp`, blob);
});
});
Promise.all(downloadPromises).then(() => {
downloadZip(zip);
});
} else {
console.log('No .webp images found.');
}
}
// Execute the function to gather and zip images
gatherAndZipImages();
}
I haven't been able to get the above solutions to work but I was able to use @hzarrabi's snippet as a base for this script. Theirs should be more reliable but for some reason only a small number of the actual images were ever found in the resources... I expect this to be fairly brittle given it's relying on data attributes to select DOM elements but as of now (November 2, 2024) it works a treat. I threw in some options at the top for image format and sizes Zillow makes available as well.
const TARGET_FORMAT = "jpeg"; // Options: `jpeg` or `webp`
const TARGET_SIZE = "1536"; // Options: `1536`, `1344`, `1152`, `960`, `768`, `576`, `384`, `192`
// Load JSZip library
const script = document.createElement('script');
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js";
document.head.appendChild(script);
script.onload = function() {
// Function to download the zip file
function downloadZip(zip) {
zip.generateAsync({type: 'blob'}).then(function(content) {
const link = document.createElement('a');
link.href = URL.createObjectURL(content);
link.download = 'images.zip';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
});
}
// Function to gather and zip image URLs from "media wall"
function gatherAndZipImages() {
// Gather the image URLs
const mediaWall = document.querySelector('ul[data-cy="hollywood-vertical-media-wall"]');
const sources = Array.from(mediaWall.querySelectorAll(`source[type="image/${TARGET_FORMAT}"]`));
// Try to pull the largest src URL from a source's srcset
// srcset is in the format "<url> <size>, <url> <size>" so we split it and try to grab the last (hopefully largest) URL
// It shouldn't really matter, though, since the regex will replace the target size with the largest possible anyway
const imageUrls = sources.map(source => {return source.srcset.split(",").at(-1).split(" ")[1].replaceAll(/_\d+.(jpg|webp)/g, `_${TARGET_SIZE}.${TARGET_FORMAT}`)});
const zip = new JSZip();
const imgFolder = zip.folder("images");
if (imageUrls.length > 0) {
console.log('Image URLs:', imageUrls);
const downloadPromises = imageUrls.map((url, index) => {
return fetch(url).then(response => response.blob()).then(blob => {
imgFolder.file(`image_${index + 1}.${TARGET_FORMAT}`, blob);
});
});
Promise.all(downloadPromises).then(() => {
downloadZip(zip);
});
} else {
console.log(`No .${TARGET_FORMAT} images found.`);
}
}
// Execute the function to gather and zip images
gatherAndZipImages();
}
This latest one worked great for me! Just needed to remember to Continue in the debugger repeatedly for each image on the page. Thanks!
@jawaad-ahmad, glad it worked for ya! I completely forgot to remove that debugger
statement. I edited my above comment to remove it so continuing the debugger shouldn't be necessary with the new coee
@jessereitz your script worked!!! Thank you!!!
@FredyLegacy
Worked for me today using Arc browser (based on Chromium). When you open picture from slideshow view, right-click and choose inspect, then select the Console tab, and run the code at bottom at the > and tap enter key