Last active
October 7, 2022 14:57
-
-
Save JohannesBauer97/87c95bfdbd44efc547fc0518f5114643 to your computer and use it in GitHub Desktop.
How to scrape full resolution images from Google in 2022
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Get all results | |
const imageResults = document.querySelectorAll('[jsname="N9Xkfe"]'); | |
console.log('Number of image results:', imageResults.length); | |
// This array will be filled with image urls later | |
const imageSourceUrls = []; | |
// Iterate through results and subscribe to attribute change events | |
imageResults.forEach(imageResult => { | |
// the HTMLElement which will contain the source href | |
const imageResultLink = imageResult.children[1]; | |
function addObserverIfDesiredNodeAvailable() { | |
if(!imageResultLink) { | |
//The node we need does not exist yet. | |
//Wait 500ms and try again | |
window.setTimeout(addObserverIfDesiredNodeAvailable,500); | |
return; | |
} | |
var observer = new MutationObserver(function(mutations) { | |
mutations.forEach(function(mutation) { | |
if (mutation.type === "attributes" && mutation.attributeName === "href") { | |
// If the HTMLElement href attribute is added, the source url is extracted | |
const hrefValue = mutation.target.getAttribute('href'); | |
const url = new URL(hrefValue, "https://example.org"); | |
const imageSourceUrl = url.searchParams.get('imgurl'); | |
imageSourceUrls.push(imageSourceUrl); | |
console.log('Image URL:', imageSourceUrl); | |
} | |
}); | |
}); | |
// start observing the HTMLElement | |
observer.observe(imageResultLink, { | |
attributes: true | |
}); | |
} | |
addObserverIfDesiredNodeAvailable(); | |
}); | |
// Click result by result in 1s steps | |
var i = 0; | |
var interval = setInterval(() => { | |
if(i >= imageResults.length){ | |
console.log("Done clicking"); | |
clearInterval(interval); | |
console.log("LIST OF ALL IMAGES", imageSourceUrls); | |
return; | |
} | |
const imageResultLink = imageResults[i].children[1]; | |
// skipping invalid results like "related searches" | |
if(!imageResultLink || imageResultLink.getAttribute("role") != "button"){ | |
console.log('skip'); | |
i++; | |
return; | |
} | |
imageResultLink.click(); | |
console.log(`Click ${i}/${imageResults.length}`); | |
i++; | |
}, 1000); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment