Skip to content

Instantly share code, notes, and snippets.

@tim-evans
Last active June 21, 2019 02:26
Show Gist options
  • Save tim-evans/2e87913565b28bd5b252f6f1826aee1e to your computer and use it in GitHub Desktop.
Save tim-evans/2e87913565b28bd5b252f6f1826aee1e to your computer and use it in GitHub Desktop.
const https = require('https');
const { URL } = require('url');
const AWS = require('aws-sdk');
function fetch(location) {
let url = new URL(location);
return new Promise(resolve => {
return https.get({
host: url.host,
path: url.pathname
}, function (response) {
let html = '';
response.on('data', data => html += data);
response.on('end', () => resolve(html));
});
});
}
function pushToS3(bucket, Key, Body) {
return new Promise((resolve, reject) => {
let ContentType = Key.match(/\.js$/) ? 'application/javascript' :
Key.match(/\.json$/) ? 'application/json' :
Key.match(/\.css$/) ? 'text/css' :
Key.match(/\.xml$/) ? 'text/xml' :
'text/html';
bucket.upload({
ACL: 'public-read',
Key,
Body,
ContentType
}, function (err, res) {
if (err) reject(err);
resolve();
});
});
}
async function sync(location, bucket) {
let url = new URL(location);
let path = url.pathname.slice(1);
let contents = await fetch(path === 'home' ? location.replace('/home', '') : url);
let Key = path;
await pushToS3(bucket, Key, contents);
return contents;
}
async function run(hostname, bucket) {
let sitemap = await sync('https://' + hostname + '/sitemap.xml', bucket);
let urls = sitemap.match(/<loc>([^<]+)<\/loc>/g).map(loc => loc.replace(/<loc>([^<]+)<\/loc>/g, '$1'));
await sync('https://' + hostname + '/style.css', bucket);
await sync('https://' + hostname + '/scripts/gallery.js', bucket);
await sync('https://' + hostname + '/scripts/header.js', bucket);
await sync('https://' + hostname + '/404', bucket);
for (let i = 0, len = urls.length; i < len; i++) {
let url = urls[i];
console.log('🧵 Grabbing ' + url);
await sync(url, bucket);
await sync(url + '.json', bucket);
}
console.log('Done');
}
AWS.config.update({
accessKeyId: process.argv[2],
secretAccessKey: process.argv[3]
});
run('www.queertangocollective.org', new AWS.S3({
params: { Bucket: 'queertangocollective-archive' }
}));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment