-
-
Save bryant988/9510cff838d86dcefa3b9ea3835b8552 to your computer and use it in GitHub Desktop.
/** | |
* NOTE: this specifically works if the house is for sale since it renders differently. | |
* This will download the highest resolution available per image. | |
*/ | |
/** | |
* STEP 1: Make sure to *SCROLL* through all images so they appear on DOM. | |
* No need to click any images. | |
*/ | |
/** | |
* STEP 2: Open Dev Tools Console. | |
* Copy and paste code below | |
*/ | |
const script = document.createElement('script'); | |
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"; | |
script.onload = () => { | |
$ = jQuery.noConflict(); | |
const imageList = $('ul.media-stream li picture source[type="image/jpeg"]').map(function () { | |
const srcset = $(this).attr('srcset').split(' '); // get highest res urls for each image | |
return srcset[srcset.length - 2] | |
}).toArray(); | |
const delay = ms => new Promise(res => setTimeout(res, ms)); // promise delay | |
// get all image blobs in parallel first before downloading for proper batching | |
Promise.all(imageList.map(i => fetch(i)) | |
).then(responses => | |
Promise.all(responses.map(res => res.blob())) | |
).then(async (blobs) => { | |
for (let i = 0; i < blobs.length; i++) { | |
if (i % 10 === 0) { | |
console.log('1 sec delay...'); | |
await delay(1000); | |
} | |
var a = document.createElement('a'); | |
a.style = "display: none"; | |
console.log(i); | |
var url = window.URL.createObjectURL(blobs[i]); | |
a.href = url; | |
a.download = i + ''; | |
document.body.appendChild(a); | |
a.click(); | |
setTimeout(() => { | |
window.URL.revokeObjectURL(url); | |
}, 100); | |
} | |
}); | |
}; | |
document.getElementsByTagName('head')[0].appendChild(script); |
@medosf Which above script is this for?
Hi all,
@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,
Had to edit a lot of it, but this version that I made now works perfectly in one try:
const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";
script.onload = async () => {
const $ = jQuery.noConflict();
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
const imageList = [];
while (
$(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
$(".photo-carousel-icon-wrapper .icon-reload").length
) {
await delay(200);
if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
break;
}
const srcset = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset");
if (srcset) {
const srcs = srcset.split(" ");
const src = srcs[srcs.length - 2];
if (!imageList.includes(src)) {
imageList.push(src);
}
}
$(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
}
const responses = await Promise.all(imageList.map((src) => fetch(src)));
const blobs = await Promise.all(responses.map((res) => res.blob()));
for (let i = 0; i < blobs.length; i++) {
if (i % 10 === 0) {
console.log("1 sec delay...");
await delay(1000);
}
const a = document.createElement("a");
a.style.display = "none";
const url = window.URL.createObjectURL(blobs[i]);
a.href = url;
a.download = `${i}`;
document.body.appendChild(a);
a.click();
setTimeout(() => {
window.URL.revokeObjectURL(url);
}, 100);
}
};
document.head.appendChild(script);
Just open in DevConsole, run it - and you'll see the following:
For educational use only.
So I tested your code @Montana and it didnt work for me.....
Possibly due the the fact that Zillow change the url to a more dynamic url......
So I use there sister site Trulia.com and wrote the code below..
@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,
Please run inside chrome devtool console..
async function extractImageUrls(optionalUrl = null, delay = 5000) {
function extractFromPage(document) {
const container = document.querySelector('div[data-testid="grid-gallery"]');
const imageUrls = [];
if (container) {
const images = container.querySelectorAll('picture > img');
images.forEach(img => {
if (img.src) {
imageUrls.push(img.src);
}
});
}
return imageUrls;
}
function waitForElement(selector, timeout = 5000) {
return new Promise((resolve, reject) => {
const interval = 100;
const maxAttempts = timeout / interval;
let attempts = 0;
const intervalId = setInterval(() => {
const element = document.querySelector(selector);
if (element) {
clearInterval(intervalId);
resolve(element);
}
if (attempts >= maxAttempts) {
clearInterval(intervalId);
reject(new Error('Element not found within timeout period'));
}
attempts++;
}, interval);
});
}
function delayExecution(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
if (optionalUrl) {
try {
const response = await fetch(optionalUrl);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const text = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
// Wait for the modal content to load
await delayExecution(delay);
const imageUrls = extractFromPage(doc);
console.log(imageUrls);
} catch (error) {
console.error('Error fetching the specified URL:', error);
}
} else {
try {
// Wait for the modal content to load
await delayExecution(delay);
const imageUrls = extractFromPage(document);
console.log(imageUrls);
} catch (error) {
console.error('Error:', error);
}
}
}
// Usage:
// To extract image URLs from the current page/modal:
extractImageUrls();
// To extract image URLs from a specified URL:
extractImageUrls('https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab', 5000);
In my code please notice that you can run when current page/modal picture are in view...
OR
You can run remotely with a url (e.g: https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab)
you have to add the ?mid=0#lil-mediaTab on the end of the url
Hey @samjco,
This script will work if you go to the last modal of photos, then run it. Then you can rerun it on the #1 of the photos depending on how many are in the aggregate set:
const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";
script.onload = async () => {
$ = jQuery.noConflict();
const delay = (ms) => new Promise((res) => setTimeout(res, ms)); // promise delay
// can't map since there isn't a list, so just push as we find more.
const imageList = [];
// while there is a next button
while (
$(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
$(".photo-carousel-icon-wrapper .icon-reload").length
) {
// Wait a little to make sure the next image source is loaded. If you get an error, increasing the timeout might help
await delay(200);
// Last image, break out of loop
if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
break;
}
const srcs = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset").split(" ");
const src = srcs[srcs.length - 2];
// just in case... let make sure the src is not already in the list.
if (imageList.indexOf(src) === -1) {
imageList.push(src);
}
// go to the next slide
$(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
}
// get all image blobs in parallel first before downloading for proper batching
Promise.all(imageList.map((i) => fetch(i)))
.then((responses) => Promise.all(responses.map((res) => res.blob())))
.then(async (blobs) => {
for (let i = 0; i < blobs.length; i++) {
if (i % 10 === 0) {
console.log("1 sec delay...");
await delay(1000);
}
let a = document.createElement("a");
a.style = "display: none";
console.log(i);
let url = window.URL.createObjectURL(blobs[i]);
a.href = url;
a.download = i + "";
document.body.appendChild(a);
a.click();
setTimeout(() => {
window.URL.revokeObjectURL(url);
}, 100);
}
});
};
document.getElementsByTagName("head")[0].appendChild(script);
I really appreciate you sharing your script with everyone, I will definitely try it out.
Cheers,
Montana
OK. I've made a chrome ext!!
https://github.com/samjco/zillow-listing-image-extractor-chrome-extension
Nice, this worked well. Thank you!
This also worked for me. So long as you scroll down in the Zillow image gallery to get all the images it will gather all the images in a zip folder for you. I targeted all the .webp images since that's the image type that Zillow requests/uses.
// Load JSZip library
const script = document.createElement('script');
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js";
document.head.appendChild(script);
script.onload = function() {
// Function to download the zip file
function downloadZip(zip) {
zip.generateAsync({type: 'blob'}).then(function(content) {
const link = document.createElement('a');
link.href = URL.createObjectURL(content);
link.download = 'images.zip';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
});
}
// Function to gather and zip .webp image URLs from the network requests
function gatherAndZipImages() {
const imageUrls = [];
const zip = new JSZip();
const imgFolder = zip.folder("images");
// Get all network requests from the Performance tab
const requests = window.performance.getEntriesByType('resource');
requests.forEach((request, index) => {
if (request.initiatorType === 'img' && request.name.endsWith('.webp')) {
imageUrls.push(request.name);
}
});
if (imageUrls.length > 0) {
console.log('Image URLs:', imageUrls);
const downloadPromises = imageUrls.map((url, index) => {
return fetch(url).then(response => response.blob()).then(blob => {
imgFolder.file(`image_${index + 1}.webp`, blob);
});
});
Promise.all(downloadPromises).then(() => {
downloadZip(zip);
});
} else {
console.log('No .webp images found.');
}
}
// Execute the function to gather and zip images
gatherAndZipImages();
}
I haven't been able to get the above solutions to work but I was able to use @hzarrabi's snippet as a base for this script. Theirs should be more reliable but for some reason only a small number of the actual images were ever found in the resources... I expect this to be fairly brittle given it's relying on data attributes to select DOM elements but as of now (November 2, 2024) it works a treat. I threw in some options at the top for image format and sizes Zillow makes available as well.
const TARGET_FORMAT = "jpeg"; // Options: `jpeg` or `webp`
const TARGET_SIZE = "1536"; // Options: `1536`, `1344`, `1152`, `960`, `768`, `576`, `384`, `192`
// Load JSZip library
const script = document.createElement('script');
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js";
document.head.appendChild(script);
script.onload = function() {
// Function to download the zip file
function downloadZip(zip) {
zip.generateAsync({type: 'blob'}).then(function(content) {
const link = document.createElement('a');
link.href = URL.createObjectURL(content);
link.download = 'images.zip';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
});
}
// Function to gather and zip image URLs from "media wall"
function gatherAndZipImages() {
// Gather the image URLs
const mediaWall = document.querySelector('ul[data-cy="hollywood-vertical-media-wall"]');
const sources = Array.from(mediaWall.querySelectorAll(`source[type="image/${TARGET_FORMAT}"]`));
// Try to pull the largest src URL from a source's srcset
// srcset is in the format "<url> <size>, <url> <size>" so we split it and try to grab the last (hopefully largest) URL
// It shouldn't really matter, though, since the regex will replace the target size with the largest possible anyway
const imageUrls = sources.map(source => {return source.srcset.split(",").at(-1).split(" ")[1].replaceAll(/_\d+.(jpg|webp)/g, `_${TARGET_SIZE}.${TARGET_FORMAT}`)});
const zip = new JSZip();
const imgFolder = zip.folder("images");
if (imageUrls.length > 0) {
console.log('Image URLs:', imageUrls);
const downloadPromises = imageUrls.map((url, index) => {
return fetch(url).then(response => response.blob()).then(blob => {
imgFolder.file(`image_${index + 1}.${TARGET_FORMAT}`, blob);
});
});
Promise.all(downloadPromises).then(() => {
downloadZip(zip);
});
} else {
console.log(`No .${TARGET_FORMAT} images found.`);
}
}
// Execute the function to gather and zip images
gatherAndZipImages();
}
This latest one worked great for me! Just needed to remember to Continue in the debugger repeatedly for each image on the page. Thanks!
@jawaad-ahmad, glad it worked for ya! I completely forgot to remove that debugger
statement. I edited my above comment to remove it so continuing the debugger shouldn't be necessary with the new coee
@jessereitz your script worked!!! Thank you!!!
This does not work for me. I do not see any pop-up windows. Also, if the pictures are being downloaded, did the code mention where the pictures will be stored at?