Skip to content

Instantly share code, notes, and snippets.

@matthewpizza
Last active December 18, 2017 21:36
Show Gist options
  • Save matthewpizza/3bf562a6257eb1cbea625f87062fb699 to your computer and use it in GitHub Desktop.
Save matthewpizza/3bf562a6257eb1cbea625f87062fb699 to your computer and use it in GitHub Desktop.
Trying to backup my Instagram stuff because their sandboxed API limitations only let you get the most recent 20 items
// Step 1: Scroll down until all items have loaded.
// Step 2: Paste the following into the console.
// TODO: Probably the classNames and document structure change ¯\_(ツ)_/¯
const items = document.querySelectorAll('._mck9w._gvoze._f2mse');
const media = [];
let item;
for (let i = 0, len = items.length; i < len; i++) {
item = items[i];
let is_video = false;
try {
is_video = 'Video' === item.childNodes[0].childNodes[1].childNodes[0].childNodes[0].innerText;
} catch (e) {}
media.push({
href: item.childNodes[0].href,
src: item.childNodes[0].childNodes[0].childNodes[0].childNodes[0].src,
is_video: is_video
})
}
// Step 3: Right click this line and “Save as…”
console.log(JSON.stringify(media, null, 2))
// Run this with node
const fs = require('fs');
const https = require('https');
const { exec } = require('child_process');
// Whatever you called this file
const items = require('./data.json');
const dir = `${__dirname}/media`;
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
function getVideo(item) {
https.get(item.href, (res) => {
const { statusCode } = res;
if (200 !== statusCode) {
console.log(`:( ${statusCode} for ${item.href}`);
res.resume();
return;
}
res.setEncoding('utf8');
let data = '';
res.on('data', (chunk) => { data += chunk; });
res.on('end', () => {
let url = data.match(/\<meta\sproperty\=\"og:video\"\scontent\=\"(.*)\"\s\/\>/i)[1];
downloadMedia(url);
});
})
}
function basename(path) {
return path.replace(/\\/g,'/').replace(/.*\//, '');
}
function downloadMedia(url) {
if (fs.existsSync(`${dir}/${basename(url)}`)) {
console.log(`${dir}/${basename(url)} exists`);
return;
}
// I’m lazy
console.log(`Fetching ${url}`);
exec(`cd ${dir}; wget ${url}`);
}
let item;
for (let i = 0, len = items.length; i < len; i++) {
item = items[i];
if (item.is_video) {
getVideo(item);
continue;
}
downloadMedia(item.src)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment