-
-
Save rvvvt/fb44ece1bba7a9836cc9877fe6b2df88 to your computer and use it in GitHub Desktop.
Instagram JS images scraper by tag
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const IMAGES_NUM = 250 | |
let imgSrcs = new Set() | |
function sleep(ms) { | |
return new Promise(resolve => setTimeout(resolve, ms)); | |
} | |
function appendImages() { | |
var imgs = document.getElementsByTagName("img") | |
for (var i = 0; i < imgs.length; i++) { | |
imgSrcs.add(imgs[i].src) | |
} | |
} | |
function autoScroll() { | |
if(imgSrcs.size < IMAGES_NUM) { | |
window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' }) | |
} else { | |
clearInterval(autoScrollInterval) | |
downloadImages() | |
} | |
} | |
function toDataURL(url) { | |
return fetch(url).then((response) => { | |
return response.blob() | |
}).then(blob => { | |
return URL.createObjectURL(blob) | |
}).catch(err => { | |
return false | |
}) | |
} | |
function millisToMinutesAndSeconds(millis) { | |
var minutes = Math.floor(millis / 60000); | |
var seconds = ((millis % 60000) / 1000).toFixed(0); | |
return `${minutes}:${(seconds < 10 ? "0" : "")}${seconds}`; | |
} | |
function makeStr(len, char) { | |
return Array.prototype.join.call({length: (len || -1) + 1}, char || 'x'); | |
} | |
async function downloadImages() { | |
const imgSrcsArray = Array.from(imgSrcs) | |
const imgNumber = imgSrcsArray.length | |
const startTime = new Date().getTime() | |
for (var i = 0; i < imgNumber; i++) { | |
const paths = window.location.pathname.split('/') | |
const a = document.createElement("a") | |
a.href = await toDataURL(imgSrcsArray[i]) | |
if(a.href != false) { | |
a.download = paths[3] + ".jpg" | |
document.body.appendChild(a) | |
await sleep(100); | |
a.click() | |
document.body.removeChild(a) | |
} | |
let timestamp = new Date().getTime() | |
let ellapsed = timestamp - startTime | |
let remaining = imgNumber - i | |
let eta = remaining * ellapsed / i | |
let progressBar = (length = 20) => { | |
let progressRatio = Math.round((i+1) / imgNumber * length) | |
let first = makeStr(progressRatio ,'=') | |
let end = makeStr(length - progressRatio, ' ') | |
return `[${first}>${end}]` | |
} | |
console.clear() | |
console.log(`${i+1} / ${imgNumber} ${progressBar()} ETA: ${millisToMinutesAndSeconds(eta)}s`) | |
} | |
} | |
var isScrolling; | |
window.addEventListener('scroll', () => { | |
window.clearTimeout( isScrolling ) | |
isScrolling = setTimeout(function() { | |
appendImages() | |
console.log(imgSrcs.size) | |
}, 66) | |
}, false) | |
autoScrollInterval = setInterval(autoScroll, 1500) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
this is great. only downside is that it grabs thumbnails mostly.