Skip to content

Instantly share code, notes, and snippets.

@magegu
Last active July 11, 2023 20:12
Show Gist options
  • Save magegu/ea94cca4a40a764af487 to your computer and use it in GitHub Desktop.
Save magegu/ea94cca4a40a764af487 to your computer and use it in GitHub Desktop.
mutipart upload for aws s3 with nodejs based on the async lib including retries for part uploads
/*
by Martin Güther @magegu
just call it:
uploadFile(absoluteFilePath, callback);
*/
var path = require('path');
var async = require('async');
var fs = require('fs');
var AWS = require('aws-sdk');
AWS.config.loadFromPath('./aws.json');
var s3 = new AWS.S3();
var bucketName = "YOUR BUCKET NAME";
function uploadMultipart(absoluteFilePath, fileName, uploadCb) {
s3.createMultipartUpload({ Bucket: bucketName, Key: fileName }, (mpErr, multipart) => {
if(!mpErr){
//console.log("multipart created", multipart.UploadId);
fs.readFile(absoluteFilePath, (err, fileData) => {
var partSize = 1024 * 1024 * 5;
var parts = Math.ceil(fileData.length / partSize);
async.timesSeries(parts, (partNum, next) => {
var rangeStart = partNum*partSize;
var end = Math.min(rangeStart + partSize, fileData.length);
console.log("uploading ", fileName, " % ", (partNum/parts).toFixed(2));
partNum++;
async.retry((retryCb) => {
s3.uploadPart({
Body: fileData.slice(rangeStart, end),
Bucket: bucketName,
Key: fileName,
PartNumber: partNum,
UploadId: multipart.UploadId
}, (err, mData) => {
retryCb(err, mData);
});
}, (err, data) => {
//console.log(data);
next(err, {ETag: data.ETag, PartNumber: partNum});
});
}, (err, dataPacks) => {
s3.completeMultipartUpload({
Bucket: bucketName,
Key: fileName,
MultipartUpload: {
Parts: dataPacks
},
UploadId: multipart.UploadId
}, uploadCb);
});
});
}else{
uploadCb(mpErr);
}
});
}
function uploadFile(absoluteFilePath, uploadCb) {
var fileName = path.basename(absoluteFilePath);
var stats = fs.statSync(absoluteFilePath)
var fileSizeInBytes = stats["size"]
if(fileSizeInBytes < (1024*1024*5)) {
async.retry((retryCb) => {
fs.readFile(absoluteFilePath, (err, fileData) => {
s3.putObject({
Bucket: bucketName,
Key: fileName,
Body: fileData
}, retryCb);
});
}, uploadCb);
}else{
uploadMultipart(absoluteFilePath, fileName, uploadCb)
}
}
@kuldeepchopradotnet
Copy link

kuldeepchopradotnet commented Jul 17, 2020

can we used read stream instead readfile?

@sboyina
Copy link

sboyina commented Aug 20, 2020

Thanks for code.

@mostafa7904
Copy link

RangeError [ERR_FS_FILE_TOO_LARGE]: File size (10485760000) is greater than possible Buffer: 2147483647 bytes

@shahrukhdev

I think that's an fs error and has nothing to do with s3.

Check this answer on stackoverflow, it might fix your issue.
https://stackoverflow.com/a/63553430/11084093

@thelebdev
Copy link

Thanks a lot @magegu , that was really helpful.
But you need to update the code to change (1024_1024_5)=5MB to (1024_1024_1024*5)=5GB

Why is that?

@YouthInnoLab
Copy link

I am sorry that I am pretty new in node js, can anyone post a short demo of how to use this code for multipart upload a huge size file (like 20 Gb) from S3 bucket ( assume has no permission issue), thank you,.

@thelebdev
Copy link

thelebdev commented Jul 13, 2021 via email

@soubhikchatterjee
Copy link

Just one question. Does this occupy the nodejs server's disk space during the upload process?

@KeynesYouDigIt
Copy link

KeynesYouDigIt commented Nov 22, 2021

Extremely helpful thanks. would it make sense as an NPM package?

@soubhikchatterjee I think it would, but you could swap memfs for fs to avoid that I think

@KeynesYouDigIt
Copy link

@codywesley
Copy link

This works fine in my end. Thanks a lot!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment