Skip to content

Instantly share code, notes, and snippets.

@bryant988
Last active July 21, 2024 06:49
Show Gist options
  • Save bryant988/9510cff838d86dcefa3b9ea3835b8552 to your computer and use it in GitHub Desktop.
Save bryant988/9510cff838d86dcefa3b9ea3835b8552 to your computer and use it in GitHub Desktop.
Zillow Image Downloader
/**
* NOTE: this specifically works if the house is for sale since it renders differently.
* This will download the highest resolution available per image.
*/
/**
* STEP 1: Make sure to *SCROLL* through all images so they appear on DOM.
* No need to click any images.
*/
/**
* STEP 2: Open Dev Tools Console.
* Copy and paste code below
*/
const script = document.createElement('script');
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";
script.onload = () => {
$ = jQuery.noConflict();
const imageList = $('ul.media-stream li picture source[type="image/jpeg"]').map(function () {
const srcset = $(this).attr('srcset').split(' '); // get highest res urls for each image
return srcset[srcset.length - 2]
}).toArray();
const delay = ms => new Promise(res => setTimeout(res, ms)); // promise delay
// get all image blobs in parallel first before downloading for proper batching
Promise.all(imageList.map(i => fetch(i))
).then(responses =>
Promise.all(responses.map(res => res.blob()))
).then(async (blobs) => {
for (let i = 0; i < blobs.length; i++) {
if (i % 10 === 0) {
console.log('1 sec delay...');
await delay(1000);
}
var a = document.createElement('a');
a.style = "display: none";
console.log(i);
var url = window.URL.createObjectURL(blobs[i]);
a.href = url;
a.download = i + '';
document.body.appendChild(a);
a.click();
setTimeout(() => {
window.URL.revokeObjectURL(url);
}, 100);
}
});
};
document.getElementsByTagName('head')[0].appendChild(script);
@wtcipher
Copy link

This does not work for me. I do not see any pop-up windows. Also, if the pictures are being downloaded, did the code mention where the pictures will be stored at?

@ANONDOTCDOT
Copy link

@medosf Which above script is this for?

@Montana
Copy link

Montana commented Jun 6, 2024

Hi all,

@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,

Had to edit a lot of it, but this version that I made now works perfectly in one try:

const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";

script.onload = async () => {
  const $ = jQuery.noConflict();

  const delay = (ms) => new Promise((res) => setTimeout(res, ms));

  const imageList = [];

  while (
    $(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
    $(".photo-carousel-icon-wrapper .icon-reload").length
  ) {
    await delay(200);

    if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
      break;
    }

    const srcset = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset");
    if (srcset) {
      const srcs = srcset.split(" ");
      const src = srcs[srcs.length - 2];

      if (!imageList.includes(src)) {
        imageList.push(src);
      }
    }

    $(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
  }

  const responses = await Promise.all(imageList.map((src) => fetch(src)));
  const blobs = await Promise.all(responses.map((res) => res.blob()));

  for (let i = 0; i < blobs.length; i++) {
    if (i % 10 === 0) {
      console.log("1 sec delay...");
      await delay(1000);
    }

    const a = document.createElement("a");
    a.style.display = "none";

    const url = window.URL.createObjectURL(blobs[i]);
    a.href = url;
    a.download = `${i}`;
    document.body.appendChild(a);
    a.click();

    setTimeout(() => {
      window.URL.revokeObjectURL(url);
    }, 100);
  }
};

document.head.appendChild(script);

Just open in DevConsole, run it - and you'll see the following:

Screenshot 2024-06-06 at 11 35 30 AM

For educational use only.

@samjco
Copy link

samjco commented Jul 12, 2024

So I tested your code @Montana and it didnt work for me.....
Possibly due the the fact that Zillow change the url to a more dynamic url......
So I use there sister site Trulia.com and wrote the code below..
@wtcipher, @ANONDOTCDOT, @eek2022, @medosf,

Please run inside chrome devtool console..

async function extractImageUrls(optionalUrl = null, delay = 5000) {
    function extractFromPage(document) {
        const container = document.querySelector('div[data-testid="grid-gallery"]');
        const imageUrls = [];
        if (container) {
            const images = container.querySelectorAll('picture > img');
            images.forEach(img => {
                if (img.src) {
                    imageUrls.push(img.src);
                }
            });
        }
        return imageUrls;
    }

    function waitForElement(selector, timeout = 5000) {
        return new Promise((resolve, reject) => {
            const interval = 100;
            const maxAttempts = timeout / interval;
            let attempts = 0;
            const intervalId = setInterval(() => {
                const element = document.querySelector(selector);
                if (element) {
                    clearInterval(intervalId);
                    resolve(element);
                }
                if (attempts >= maxAttempts) {
                    clearInterval(intervalId);
                    reject(new Error('Element not found within timeout period'));
                }
                attempts++;
            }, interval);
        });
    }

    function delayExecution(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }

    if (optionalUrl) {
        try {
            const response = await fetch(optionalUrl);
            if (!response.ok) {
                throw new Error(`HTTP error! status: ${response.status}`);
            }
            const text = await response.text();
            const parser = new DOMParser();
            const doc = parser.parseFromString(text, 'text/html');

            // Wait for the modal content to load
            await delayExecution(delay);

            const imageUrls = extractFromPage(doc);
            console.log(imageUrls);
        } catch (error) {
            console.error('Error fetching the specified URL:', error);
        }
    } else {
        try {
            // Wait for the modal content to load
            await delayExecution(delay);

            const imageUrls = extractFromPage(document);
            console.log(imageUrls);
        } catch (error) {
            console.error('Error:', error);
        }
    }
}

// Usage:
// To extract image URLs from the current page/modal:
extractImageUrls();

// To extract image URLs from a specified URL:
extractImageUrls('https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab', 5000);

In my code please notice that you can run when current page/modal picture are in view...
OR
You can run remotely with a url (e.g: https://www.trulia.com/home/14308-windsor-dr-leawood-ks-66224-75683935?mid=0#lil-mediaTab)
you have to add the ?mid=0#lil-mediaTab on the end of the url

@Montana
Copy link

Montana commented Jul 12, 2024

Hey @samjco,

This script will work if you go to the last modal of photos, then run it. Then you can rerun it on the #1 of the photos depending on how many are in the aggregate set:

const script = document.createElement("script");
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js";

script.onload = async () => {
  $ = jQuery.noConflict();

  const delay = (ms) => new Promise((res) => setTimeout(res, ms)); // promise delay

  // can't map since there isn't a list, so just push as we find more.
  const imageList = [];

  // while there is a next button
  while (
    $(".photo-carousel-icon-wrapper .icon-arrow-right").length ||
    $(".photo-carousel-icon-wrapper .icon-reload").length
  ) {
    // Wait a little to make sure the next image source is loaded. If you get an error, increasing the timeout might help
    await delay(200);
    // Last image, break out of loop
    if ($(".photo-carousel-icon-wrapper .icon-reload").length) {
      break;
    }
    const srcs = $('.hdp-gallery-image-content .image:visible source[type="image/jpeg"]').attr("srcset").split(" ");
    const src = srcs[srcs.length - 2];
    // just in case... let make sure the src is not already in the list.
    if (imageList.indexOf(src) === -1) {
      imageList.push(src);
    }

    // go to the next slide
    $(".photo-carousel-icon-wrapper .icon-arrow-right").parent().click();
  }

  // get all image blobs in parallel first before downloading for proper batching
  Promise.all(imageList.map((i) => fetch(i)))
    .then((responses) => Promise.all(responses.map((res) => res.blob())))
    .then(async (blobs) => {
      for (let i = 0; i < blobs.length; i++) {
        if (i % 10 === 0) {
          console.log("1 sec delay...");
          await delay(1000);
        }

        let a = document.createElement("a");
        a.style = "display: none";
        console.log(i);

        let url = window.URL.createObjectURL(blobs[i]);
        a.href = url;
        a.download = i + "";
        document.body.appendChild(a);
        a.click();

        setTimeout(() => {
          window.URL.revokeObjectURL(url);
        }, 100);
      }
    });
};

document.getElementsByTagName("head")[0].appendChild(script);

I really appreciate you sharing your script with everyone, I will definitely try it out.

Cheers,
Montana

@samjco
Copy link

samjco commented Jul 15, 2024

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment