Skip to content

Instantly share code, notes, and snippets.

@aseemk
Last active August 21, 2017 16:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aseemk/ec426819df805d5c173bce3ee1c624a3 to your computer and use it in GitHub Desktop.
Save aseemk/ec426819df805d5c173bce3ee1c624a3 to your computer and use it in GitHub Desktop.
FiftyThree Mix (AKA Paper Public Stream) archiver/scraper script
{
"name": "mix-archiver",
"version": "1.0.0",
"description": "Scrape and archive a FiftyThree Mix (Paper Public Stream) profile.",
"bin": "scrape.js",
"author": "Aseem Kishore <aseem.kishore@gmail.com>",
"license": "MIT",
"dependencies": {},
"devDependencies": {
"superagent": "^3.6.0"
}
}
#!/usr/bin/env node
//
// Usage: simply run `node scrape.js` in the directory where you want JSON and images downloaded.
// They'll be downloaded to `json` and `images` subdirectories, respectively.
const echo = console.log;
const FS = require('fs');
const Path = require('path');
const superagent = require('superagent');
const API_ROOT = 'https://paper-api.fiftythree.com';
const USER_ID = 'aseemk'; // <-- FILL IN YOUR MIX USER ID HERE (can find by going to https://paper.fiftythree.com/me)
// These file paths are relative to the current working directory:
const JSON_DIR = 'json';
const IMAGES_DIR = 'images';
const ensureDir = async (path) => {
try {
FS.mkdirSync(path);
} catch (err) {
if (err.code !== 'EEXIST') {
throw err;
}
}
};
const setup = async () => {
echo(`Creating directories...`);
await ensureDir(JSON_DIR);
await ensureDir(IMAGES_DIR);
};
const saveJSON = async (filename, data, verbose) => {
const file = Path.join(JSON_DIR, filename);
const json = JSON.stringify(data, null, 4);
if (verbose) echo(`Saving ${file}...`);
FS.writeFileSync(file, json, 'utf8');
};
const downloadImage = async (filename, url) => {
const file = Path.join(IMAGES_DIR, filename);
const stream = FS.createWriteStream(file);
await superagent.get(url).pipe(stream);
};
const getExtension = (imageType) => {
switch (imageType) {
case 'png': return '.png';
case 'jpeg': return '.jpg';
default: return '';
}
};
const scrape = async () => {
let items = [];
let nextURL = `/users/${USER_ID}/creations?count=100`;
let page = 1;
while (nextURL) {
echo(`Fetching JSON page ${page}...`);
const {body} = await superagent.get(API_ROOT + nextURL);
items.push(...body.items);
nextURL = body.nextURL;
page += 1;
}
await saveJSON(`${USER_ID}-creations.json`, {
type: 'list',
items,
}, true);
echo(`Saving individual creation JSONs...`);
await Promise.all(items.map(async (item) => {
return await saveJSON(`${item.id}.json`, item);
}));
echo(`Downloading individual creation images...`);
await Promise.all(items.map(async (item) => {
if (!item.imageURLs) {
return;
}
const ext = getExtension(item.imageType);
const key = `${item.imageWidth}x${item.imageHeight}/${item.imageType}`;
const url = item.imageURLs[key];
return await downloadImage(`${item.id}${ext}`, url);
}));
};
const main = async () => {
await setup();
await scrape();
};
main()
.then(() => echo('Done!'))
.catch((err) => echo(err));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment