Last active
October 21, 2020 20:13
-
-
Save heri16/e54ef791e5c9b049941724e315663b71 to your computer and use it in GitHub Desktop.
Download private AWS S3 folder via aws-sdk-js (with Browserify Shim / Webpack Polyfill for Nodejs Stream)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const file1 = { | |
name: "example/file1.txt", | |
stream: () => new Blob(["support blobs too"]).stream() | |
}; | |
// Note: windows gets confused when file & folders starts with / | |
const fileMap = new Map([ | |
["file1", file1], | |
["file2", new File(["file1 content"], "example/file2.txt")], | |
["zip-subfolder/Sintel.mp4", "s3key/Sintel.mp4"], | |
["zip-subfolder/test.txt", "s3key/test.txt.gz"] | |
]); | |
const fileEntries = fileMap.entries(); | |
const fileStream = streamSaver.createWriteStream('download.zip'); | |
// In an ideal world i would just have used a TransformStream | |
// where you would get `{ readable writable } = new TransformStream()` | |
// `readable` would be piped to streamsaver, and the writer would accept | |
// file-like object, but that made it dependent on TransformStream and WritableStream | |
// So i built ZIP-Stream simular to a ReadbleStream but you enqueue | |
// file-like objects meaning it should have at at the very least { name, stream() } | |
// | |
// it supports pull() too that gets called when it ask for more files. | |
// | |
// NOTE: My zip library can't generate zip's over 4gb and has no compression | |
// it was built solo for the purpose of saving multiple files in browser | |
// Depends on: https://github.com/jimmywarting/StreamSaver.js/blob/master/examples/zip-stream.js | |
const readableZipStream = new ZIP({ | |
start(ctrl) { | |
// Insert empty folder | |
ctrl.enqueue({ name: "info/Downloaded from S3", directory: true }); | |
}, | |
async pull(ctrl) { | |
// This is called when ZipStream ask for more files (after previous file has been "archived") | |
const it = fileEntries.next(); | |
if (!it.done) { | |
const [name, file] = it.value; | |
// enqueue if file is file-like object | |
if (file.name && typeof file.stream === 'function') { | |
// enqueue file-like object | |
ctrl.enqueue(file); | |
return; | |
} | |
// Create a request without sending it | |
// See: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#getObject-property | |
const key = file; | |
const request = s3.getObject({ Key: key }); | |
// See: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#sign-event | |
//request.onAsync('sign', signCloudfront) | |
// See: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#httpDownloadProgress-event | |
request.on('httpDownloadProgress', function ({ loaded, total }) { | |
// Display progress | |
message.textContent = `${name} - ${formatBytes(loaded)} of ${formatBytes(total)}`; | |
}); | |
// See: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#error-event | |
request.on('error', (error) => { | |
// Display request error | |
message.textContent = `Error: ${String(error)}`; | |
}); | |
const responsePromise = new Promise((resolve, reject) => { | |
// See: https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/using-a-response-event-handler.html | |
// See: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#httpHeaders-event | |
request.on('httpHeaders', (statusCode, headers, response, statusMessage) => { | |
if (statusCode < 200 || statusCode > 299) { | |
reject(new Error(`${statusCode}: $statusMessage`)); | |
} else { | |
resolve({ statusCode, headers, response, statusMessage }); | |
} | |
}) | |
.on('httpError', (error, response) => { | |
reject(error); | |
}); | |
}); | |
// Sends the request and converts the request object into a readable stream that can be read from or piped into a writable stream. | |
// See: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#createReadStream-property | |
// Make sure that you have nodeJS shim/polyfill for browser enabled in webpack/browserify and `AWS.util.stream = require('stream')` | |
const nodeStream = request.createReadStream(); | |
// Ensure file is accessible | |
const { | |
statusCode, | |
statusMessage, | |
headers | |
} = await responsePromise; | |
// See: https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html#API_GetObject_ResponseSyntax | |
//const contentType = headers["content-type"]; // image/jpeg | |
// enqueue file-like object | |
ctrl.enqueue({ | |
name, | |
stream: () => convertReadableStream(nodeStream) | |
}); | |
} else { | |
// if (done adding all files) | |
ctrl.close(); | |
} | |
} | |
}); | |
const promise = pipeTo(readableZipStream, fileStream); | |
promise.then(() => { | |
console.log("done writing"); | |
message.textContent = "Download complete"; | |
}).catch((err) => { | |
console.error(err); | |
message.textContent = `Error: ${String(err)}`; | |
}); | |
function convertReadableStream(nodeStream) { | |
return new ReadableStream({ | |
start(controller) { | |
// This is called immediately when the object is constructed | |
nodeStream | |
.on('error', (err) => controller.error(err)) | |
.on('end', () => controller.close()) | |
//.on('data', (chunk) => controller.enqueue(chunk)) | |
.on('readable', () => { | |
const chunk = nodeStream.read(); | |
controller.enqueue(chunk); | |
if (this.resolvePull) this.resolvePull(); | |
}) | |
}, | |
pull(controller) { | |
// This is called repeatedly when the stream's | |
// internal queue of chunks is not full | |
const chunk = nodeStream.read(); | |
if (chunk === null) { | |
// return promise which will resolve later once readable again | |
return new Promise((resolve) => { this.resolvePull = resolve }); | |
} else { | |
controller.enqueue(chunk); | |
} | |
//return new Promise((resolve, reject) => { | |
// const chunk = nodeStream.read(); | |
// if (chunk !== null) { | |
// controller.enqueue(chunk); | |
// resolve(); | |
// } else { | |
// // resolve later on readable again | |
// this.resolvePull = resolve; | |
// } | |
//}); | |
}, | |
cancel(reason) { | |
// This is called if the reader cancels | |
nodeStream.destroy(new Error(reason)); | |
} | |
}); | |
} | |
function pipeTo(readable, writable) { | |
try { | |
if (readable.pipeTo) return readable.pipeTo(writable); | |
} catch (_) {} | |
var reader = readable.getReader(); | |
var writer = writable.getWriter(); | |
var pump = function() { | |
return reader.read() | |
.then(function(res) { | |
return res.done ? writer.close() : writer.write(res.value).then(pump); | |
}); | |
}; | |
return pump(); | |
} | |
function formatBytes(bytes, precision) { | |
if (isNaN(parseFloat(bytes)) || !isFinite(bytes)) return '-'; | |
if (typeof precision === 'undefined') precision = 1; | |
var units = ['bytes', 'kiB', 'MiB', 'GiB', 'TiB', 'PiB'], | |
number = Math.floor(Math.log(bytes) / Math.log(1024)); | |
return (bytes / Math.pow(1024, Math.floor(number))).toFixed(precision) + ' ' + units[number]; | |
} |
Another way to convert node streams with an upcoming ReadableStream.from(iterator) feature
ReadableStream.from = function (iterable) {
if (iterable && iterable[Symbol.iterator])
iterable = iterable[Symbol.iterator]()
else if (iterable && iterable[Symbol.asyncIterator])
iterable = iterable[Symbol.asyncIterator]()
else
throw new TypeError('not an iterator')
return new ReadableStream({
async pull (ctrl) {
const it = await iterable.next()
it.done ? ctrl.close() : ctrl.enqueue(it.value)
}
})
}
function convertReadableStream (nodeStream) {
return ReadableStream.from(nodeStream) // much simpler
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
References