Skip to content

Instantly share code, notes, and snippets.

@bryant988
Last active November 20, 2024 23:18
Show Gist options
  • Save bryant988/9510cff838d86dcefa3b9ea3835b8552 to your computer and use it in GitHub Desktop.
Save bryant988/9510cff838d86dcefa3b9ea3835b8552 to your computer and use it in GitHub Desktop.
Zillow Image Downloader
/**
* NOTE: this specifically works if the house is for sale since it renders differently.
* This will download the highest resolution available per image.
*/
/**
* STEP 1: Make sure to *SCROLL* through all images so they appear on DOM.
* No need to click any images.
*/
/**
* STEP 2: Open Dev Tools Console.
* Copy and paste code below
*/
const script = document.createElement('script');
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";
script.onload = () => {
$ = jQuery.noConflict();
const imageList = $('ul.media-stream li picture source[type="image/jpeg"]').map(function () {
const srcset = $(this).attr('srcset').split(' '); // get highest res urls for each image
return srcset[srcset.length - 2]
}).toArray();
const delay = ms => new Promise(res => setTimeout(res, ms)); // promise delay
// get all image blobs in parallel first before downloading for proper batching
Promise.all(imageList.map(i => fetch(i))
).then(responses =>
Promise.all(responses.map(res => res.blob()))
).then(async (blobs) => {
for (let i = 0; i < blobs.length; i++) {
if (i % 10 === 0) {
console.log('1 sec delay...');
await delay(1000);
}
var a = document.createElement('a');
a.style = "display: none";
console.log(i);
var url = window.URL.createObjectURL(blobs[i]);
a.href = url;
a.download = i + '';
document.body.appendChild(a);
a.click();
setTimeout(() => {
window.URL.revokeObjectURL(url);
}, 100);
}
});
};
document.getElementsByTagName('head')[0].appendChild(script);
@wtcipher
Copy link

This does not work for me. I do not see any pop-up windows. Also, if the pictures are being downloaded, did the code mention where the pictures will be stored at?

@ANONDOTCDOT
Copy link

@medosf Which above script is this for?

@Montana
Copy link

Montana commented Jun 6, 2024

Hi all,

@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,

Had to edit a lot of it, but this version that I made now works perfectly in one try:

const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";

script.onload = async () => {
  const $ = jQuery.noConflict();

  const delay = (ms) => new Promise((res) => setTimeout(res, ms));

  const imageList = [];

  while (
    $(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
    $(".photo-carousel-icon-wrapper .icon-reload").length
  ) {
    await delay(200);

    if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
      break;
    }

    const srcset = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset");
    if (srcset) {
      const srcs = srcset.split(" ");
      const src = srcs[srcs.length - 2];

      if (!imageList.includes(src)) {
        imageList.push(src);
      }
    }

    $(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
  }

  const responses = await Promise.all(imageList.map((src) => fetch(src)));
  const blobs = await Promise.all(responses.map((res) => res.blob()));

  for (let i = 0; i < blobs.length; i++) {
    if (i % 10 === 0) {
      console.log("1 sec delay...");
      await delay(1000);
    }

    const a = document.createElement("a");
    a.style.display = "none";

    const url = window.URL.createObjectURL(blobs[i]);
    a.href = url;
    a.download = `${i}`;
    document.body.appendChild(a);
    a.click();

    setTimeout(() => {
      window.URL.revokeObjectURL(url);
    }, 100);
  }
};

document.head.appendChild(script);

Just open in DevConsole, run it - and you'll see the following:

Screenshot 2024-06-06 at 11 35 30 AM

For educational use only.

@samjco
Copy link

samjco commented Jul 12, 2024

So I tested your code @Montana and it didnt work for me.....
Possibly due the the fact that Zillow change the url to a more dynamic url......
So I use there sister site Trulia.com and wrote the code below..
@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,

Please run inside chrome devtool console..

async function extractImageUrls(optionalUrl = null, delay = 5000) {
    function extractFromPage(document) {
        const container = document.querySelector('div[data-testid="grid-gallery"]');
        const imageUrls = [];
        if (container) {
            const images = container.querySelectorAll('picture > img');
            images.forEach(img => {
                if (img.src) {
                    imageUrls.push(img.src);
                }
            });
        }
        return imageUrls;
    }

    function waitForElement(selector, timeout = 5000) {
        return new Promise((resolve, reject) => {
            const interval = 100;
            const maxAttempts = timeout / interval;
            let attempts = 0;
            const intervalId = setInterval(() => {
                const element = document.querySelector(selector);
                if (element) {
                    clearInterval(intervalId);
                    resolve(element);
                }
                if (attempts >= maxAttempts) {
                    clearInterval(intervalId);
                    reject(new Error('Element not found within timeout period'));
                }
                attempts++;
            }, interval);
        });
    }

    function delayExecution(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }

    if (optionalUrl) {
        try {
            const response = await fetch(optionalUrl);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const text = await response.text();
            const parser = new DOMParser();
            const doc = parser.parseFromString(text, 'text/html');

            // Wait for the modal content to load
            await delayExecution(delay);

            const imageUrls = extractFromPage(doc);
            console.log(imageUrls);
        } catch (error) {
            console.error('Error fetching the specified URL:', error);
        }
    } else {
        try {
            // Wait for the modal content to load
            await delayExecution(delay);

            const imageUrls = extractFromPage(document);
            console.log(imageUrls);
        } catch (error) {
            console.error('Error:', error);
        }
    }
}

// Usage:
// To extract image URLs from the current page/modal:
extractImageUrls();

// To extract image URLs from a specified URL:
extractImageUrls('https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab', 5000);

In my code please notice that you can run when current page/modal picture are in view...
OR
You can run remotely with a url (e.g: https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab)
you have to add the ?mid=0#lil-mediaTab on the end of the url

@Montana
Copy link

Montana commented Jul 12, 2024

Hey @samjco,

This script will work if you go to the last modal of photos, then run it. Then you can rerun it on the #1 of the photos depending on how many are in the aggregate set:

const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";

script.onload = async () => {
  $ = jQuery.noConflict();

  const delay = (ms) => new Promise((res) => setTimeout(res, ms)); // promise delay

  // can't map since there isn't a list, so just push as we find more.
  const imageList = [];

  // while there is a next button
  while (
    $(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
    $(".photo-carousel-icon-wrapper .icon-reload").length
  ) {
    // Wait a little to make sure the next image source is loaded. If you get an error, increasing the timeout might help
    await delay(200);
    // Last image, break out of loop
    if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
      break;
    }
    const srcs = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset").split(" ");
    const src = srcs[srcs.length - 2];
    // just in case... let make sure the src is not already in the list.
    if (imageList.indexOf(src) === -1) {
      imageList.push(src);
    }

    // go to the next slide
    $(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
  }

  // get all image blobs in parallel first before downloading for proper batching
  Promise.all(imageList.map((i) => fetch(i)))
    .then((responses) => Promise.all(responses.map((res) => res.blob())))
    .then(async (blobs) => {
      for (let i = 0; i < blobs.length; i++) {
        if (i % 10 === 0) {
          console.log("1 sec delay...");
          await delay(1000);
        }

        let a = document.createElement("a");
        a.style = "display: none";
        console.log(i);

        let url = window.URL.createObjectURL(blobs[i]);
        a.href = url;
        a.download = i + "";
        document.body.appendChild(a);
        a.click();

        setTimeout(() => {
          window.URL.revokeObjectURL(url);
        }, 100);
      }
    });
};

document.getElementsByTagName("head")[0].appendChild(script);

I really appreciate you sharing your script with everyone, I will definitely try it out.

Cheers,
Montana

@samjco
Copy link

samjco commented Jul 15, 2024

@jo1o3o
Copy link

jo1o3o commented Aug 7, 2024

Nice, this worked well. Thank you!

@hzarrabi
Copy link

hzarrabi commented Aug 8, 2024

This also worked for me. So long as you scroll down in the Zillow image gallery to get all the images it will gather all the images in a zip folder for you. I targeted all the .webp images since that's the image type that Zillow requests/uses.

// Load JSZip library
const script = document.createElement('script');
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js";
document.head.appendChild(script);

script.onload = function() {
    // Function to download the zip file
    function downloadZip(zip) {
        zip.generateAsync({type: 'blob'}).then(function(content) {
            const link = document.createElement('a');
            link.href = URL.createObjectURL(content);
            link.download = 'images.zip';
            document.body.appendChild(link);
            link.click();
            document.body.removeChild(link);
        });
    }

    // Function to gather and zip .webp image URLs from the network requests
    function gatherAndZipImages() {
        const imageUrls = [];
        const zip = new JSZip();
        const imgFolder = zip.folder("images");

        // Get all network requests from the Performance tab
        const requests = window.performance.getEntriesByType('resource');

        requests.forEach((request, index) => {
            if (request.initiatorType === 'img' && request.name.endsWith('.webp')) {
                imageUrls.push(request.name);
            }
        });

        if (imageUrls.length > 0) {
            console.log('Image URLs:', imageUrls);
            const downloadPromises = imageUrls.map((url, index) => {
                return fetch(url).then(response => response.blob()).then(blob => {
                    imgFolder.file(`image_${index + 1}.webp`, blob);
                });
            });

            Promise.all(downloadPromises).then(() => {
                downloadZip(zip);
            });
        } else {
            console.log('No .webp images found.');
        }
    }

    // Execute the function to gather and zip images
    gatherAndZipImages();
}

@jessereitz
Copy link

jessereitz commented Nov 2, 2024

I haven't been able to get the above solutions to work but I was able to use @hzarrabi's snippet as a base for this script. Theirs should be more reliable but for some reason only a small number of the actual images were ever found in the resources... I expect this to be fairly brittle given it's relying on data attributes to select DOM elements but as of now (November 2, 2024) it works a treat. I threw in some options at the top for image format and sizes Zillow makes available as well.

const TARGET_FORMAT = "jpeg";  // Options: `jpeg` or `webp`
const TARGET_SIZE = "1536";  // Options: `1536`, `1344`, `1152`, `960`, `768`, `576`, `384`, `192`

// Load JSZip library
const script = document.createElement('script');
script.src = "https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js";
document.head.appendChild(script);

script.onload = function() {
    // Function to download the zip file
    function downloadZip(zip) {
        zip.generateAsync({type: 'blob'}).then(function(content) {
            const link = document.createElement('a');
            link.href = URL.createObjectURL(content);
            link.download = 'images.zip';
            document.body.appendChild(link);
            link.click();
            document.body.removeChild(link);
        });
    }

    // Function to gather and zip image URLs from "media wall"
    function gatherAndZipImages() {
        // Gather the image URLs
        const mediaWall = document.querySelector('ul[data-cy="hollywood-vertical-media-wall"]');
        const sources = Array.from(mediaWall.querySelectorAll(`source[type="image/${TARGET_FORMAT}"]`));

        // Try to pull the largest src URL from a source's srcset
        // srcset is in the format "<url> <size>, <url> <size>" so we split it and try to grab the last (hopefully largest) URL
        // It shouldn't really matter, though, since the regex will replace the target size with the largest possible anyway
        const imageUrls = sources.map(source => {return source.srcset.split(",").at(-1).split(" ")[1].replaceAll(/_\d+.(jpg|webp)/g, `_${TARGET_SIZE}.${TARGET_FORMAT}`)});

        const zip = new JSZip();
        const imgFolder = zip.folder("images");

        if (imageUrls.length > 0) {
            console.log('Image URLs:', imageUrls);
            const downloadPromises = imageUrls.map((url, index) => {
                return fetch(url).then(response => response.blob()).then(blob => {
                    imgFolder.file(`image_${index + 1}.${TARGET_FORMAT}`, blob);
                });
            });

            Promise.all(downloadPromises).then(() => {
                downloadZip(zip);
            });
        } else {
            console.log(`No .${TARGET_FORMAT} images found.`);
        }
    }

    // Execute the function to gather and zip images
    gatherAndZipImages();
}

@jawaad-ahmad
Copy link

This latest one worked great for me! Just needed to remember to Continue in the debugger repeatedly for each image on the page. Thanks!

@jessereitz
Copy link

@jawaad-ahmad, glad it worked for ya! I completely forgot to remove that debugger statement. I edited my above comment to remove it so continuing the debugger shouldn't be necessary with the new coee

@macstainless
Copy link

@jessereitz your script worked!!! Thank you!!!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment