Skip to content

Instantly share code, notes, and snippets.

@joshbedo
Created August 13, 2015 11:57
Show Gist options
  • Save joshbedo/47bab20d47c1754626b5 to your computer and use it in GitHub Desktop.
Save joshbedo/47bab20d47c1754626b5 to your computer and use it in GitHub Desktop.
S3 multipart upload with NodeJS
var express = require('express')
var app = express()
var AWS = require('aws-sdk');
var bodyParser = require('body-parser');
var fs = require('fs');
var zlib = require('zlib'); // gzip compression
var multiparty = require('connect-multiparty'),
multipartyMiddleware = multiparty();
var AWS_ACCESS_KEY = '**';
var AWS_SECRET_KEY = '**';
AWS.config.update({accessKeyId: AWS_ACCESS_KEY, secretAccessKey: AWS_SECRET_KEY});
AWS.config.region = 'us-west-2';
app.use("/public",express.static(__dirname + '/public'));
app.use("/node_modules",express.static(__dirname + '/node_modules'));
// parse application/x-www-form-urlencoded
app.use(bodyParser.urlencoded({ extended: false }))
// parse application/json
app.use(bodyParser.json())
app.post('/s', multipartyMiddleware, function (req, res) {
var s3 = new AWS.S3();
var file = req.files.file;
var buffer = fs.readFileSync(file.path);
var startTime = new Date();
var partNum = 0;
var partSize = 1024 * 1024 * 5; // 5mb chunks except last part
var numPartsLeft = Math.ceil(buffer.length / partSize);
var maxUploadTries = 3;
var multipartParams = {
Bucket: 'videos.imstillreallybored',
Key: file.name,
ContentType: file.type
};
var multipartMap = {
Parts: []
};
console.log('Creating multipart upload for:', file.name);
s3.createMultipartUpload(multipartParams, function(mpErr, multipart) {
if (mpErr) return console.error('Error!', mpErr);
console.log('Got upload ID', multipart.UploadId);
for (var start = 0; start < buffer.length; start += partSize) {
partNum++;
var end = Math.min(start + partSize, buffer.length);
var partParams = {
Body: buffer.slice(start, end),
Bucket: multipartParams.Bucket,
Key: multipartParams.Key,
PartNumber: String(partNum),
UploadId: multipart.UploadId
};
console.log('Uploading part: #', partParams.PartNumber, ', Start:', start);
uploadPart(s3, multipart, partParams);
}
});
function completeMultipartUpload(s3, doneParams) {
s3.completeMultipartUpload(doneParams, function(err, data) {
if (err) return console.error('An error occurred while completing multipart upload');
var delta = (new Date() - startTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Final upload data:', data);
});
}
function uploadPart(s3, multipart, partParams, tryNum) {
var tryNum = tryNum || 1;
s3.uploadPart(partParams, function(multiErr, mData) {
console.log('started');
if (multiErr) {
console.log('Upload part error:', multiErr);
if (tryNum < maxUploadTries) {
console.log('Retrying upload of part: #', partParams.PartNumber);
uploadPart(s3, multipart, partParams, tryNum + 1);
} else {
console.log('Failed uploading part: #', partParams.PartNumber);
}
// return;
}
multipartMap.Parts[this.request.params.PartNumber - 1] = {
ETag: mData.ETag,
PartNumber: Number(this.request.params.PartNumber)
};
console.log('Completed part', this.request.params.PartNumber);
console.log('mData', mData);
if (--numPartsLeft > 0) return; // complete only when all parts uploaded
var doneParams = {
Bucket: multipartParams.Bucket,
Key: multipartParams.Key,
MultipartUpload: multipartMap,
UploadId: multipart.UploadId
};
console.log('Completing upload...');
completeMultipartUpload(s3, doneParams);
}).on('httpUploadProgress', function(progress) { console.log(Math.round(progress.loaded/progress.total*100)+ '% done') });
}
// console.log(stream);
// s3.upload(params).
// on('httpUploadProgress', function(progress) { console.log(progress); }).
// send(function(err, data) { console.log(err, data) });
// var s3 = new AWS.S3();
// var params = {Bucket: 'videos.imstillreallybored', Key: req.body.name, ContentType: req.body.type};
// s3.getSignedUrl('putObject', params, function(err, url) {
// if(err) console.log(err);
// res.json({url: url});
//
// });
// console.log(Math.round(progress.loaded/progress.total*100)+ '% done')
})
var server = app.listen(3000, function () {
var host = server.address().address
var port = server.address().port
console.log('Example app listening at http://%s:%s', host, port)
})
@ranveerkumar
Copy link

Hi,
I am using similar thing to build an Adobe Indesign Extension. I'm using following code read the file:
Adobe application provides IOJS 1.2.0 underneath. So I'm trying to read a file from system's local path and upload it in chunks.

buffer = fs.readFileSync(localPath);

Rest of the code is similar to what you have here. File Types are INDD, PDF, IDML, PSD, AI, SVG, TTF, OTF, PNG, JPG etc.

It uploads successfully to S3. However when I download and open them, it won't open. PDF opens as a single blank page document.

Any idea what could possibly go wrong?

@mkhizeryounas
Copy link

Hi,

I have a question that what if the last part we get is less than 5mb. which is not supported by s3 multipart. what happens then?

@sheeshmohsin
Copy link

@mkhizeryounas Last part can be less than 5mb. See here

@mkhizeryounas
Copy link

Yes, I found it in the docs. Thanks :)

@abhijeet1995
Copy link

When I upload large then 200Mb file I got an error

Upload part error: { Error: write ECONNRESET
at WriteWrap.afterWrite [as oncomplete] (net.js:788:14)
message: 'write ECONNRESET',
errno: 'ECONNRESET',
code: 'NetworkingError',
syscall: 'write',
region: 'us-east-2',
hostname: 'botiqfiles.s3.us-east-2.amazonaws.com',
retryable: true,
time: 2019-11-28T09:13:01.540Z,
statusCode: 400 }
Retrying upload of part: # 112
C:\Users\abhijeet\Documents\nodejsmysql\node_modules\aws-sdk\lib\request.js:31
throw err;
^

TypeError: Cannot read property 'ETag' of null

@balajikr1994
Copy link

This code only works. If I call only one API call. Its not work if I call two or more API call, Loop will be failed.

@egekhter
Copy link

egekhter commented Aug 14, 2020

Thank you for the script. I adapted it to allow copying files over 5GB from one bucket to another in a memory efficient manner.

function copyS3MP(from_bucket, from_key, to_bucket, to_key) {
    const AWS = require('aws-sdk');
    AWS.config.update({region: 'us-west-2'});

    let s3 = new AWS.S3();

    let head, uploadId, numParts, fileSize;

    let startTime = new Date();
    let partNum = 0;
    let partSize = 1024 * 1024 * 10; // 10mb chunks except last part
    let maxUploadTries = 3;

    let multiPartParams = {
        Bucket: to_bucket,
        Key: to_key,
        ContentType: getContentType(to_key)
    };

    let multipartMap = {
        Parts: []
    };

    function getHead() {
        return new Promise(async (resolve, reject) => {
            try {
                const h = await s3.headObject({
                    Bucket: from_bucket,
                    Key: from_key
                }).promise();

                resolve(h);
            } catch (e) {
                reject(e);
            }
        });
    }

    function createMultipartUpload() {
        return new Promise(async (resolve, reject) => {
            try {
                s3.createMultipartUpload(multiPartParams, function(mpErr, multipart) {
                    if (mpErr) {
                        console.error(mpErr);
                        return reject(mpErr);
                    }

                    console.log('Got upload ID', multipart.UploadId);

                    return resolve(multipart.UploadId);
                });
            } catch (e) {
                reject(e);
            }
        });
    }

    function copyPart(start, partNum) {
        let tryNum = 1;

        function copyLogic(copyParams) {
            return new Promise((resolve, reject) => {
                s3.uploadPartCopy(copyParams, function(multiErr, mData) {
                    if (multiErr) {
                        console.log('Upload part error:', multiErr);
                        return reject(multiErr);
                    } else {
                        multipartMap.Parts[this.request.params.PartNumber - 1] = {
                            ETag: mData.ETag,
                            PartNumber: Number(this.request.params.PartNumber)
                        };

                        console.log('Completed part', this.request.params.PartNumber);
                        console.log('mData', mData);

                        return resolve();
                    }
                }).on('httpUploadProgress', function(progress) {  console.log(Math.round(progress.loaded/progress.total*100)+ '% done') });
            });
        }

        return new Promise(async (resolve, reject) => {
            let end = Math.min(start + partSize, fileSize);

            try {
                let partParams = {
                    Bucket: to_bucket,
                    Key: to_key,
                    PartNumber: String(partNum),
                    UploadId: uploadId,
                    CopySource: `${from_bucket}/${from_key}`,
                    CopySourceRange: `bytes=${start}-${end - 1}`
                };

                while (tryNum <= maxUploadTries) {
                    try {
                        await copyLogic(partParams);
                        return resolve();
                    } catch (e) {
                        tryNum++;
                        if (tryNum <= maxUploadTries) {
                            console.log('Retrying copy of part: #', partParams.PartNumber);
                            await module.exports.sleep(1);
                        } else {
                            console.log('Failed uploading part: #', partParams.PartNumber);
                            return reject(e);
                        }
                    }
                }

                resolve();
            } catch (e) {
                return reject(e);
            }
        });
    }

    function completeMultipartUpload() {
        return new Promise((resolve, reject) => {
            let doneParams = {
                Bucket: to_bucket,
                Key: to_key,
                MultipartUpload: multipartMap,
                UploadId: uploadId
            };

            s3.completeMultipartUpload(doneParams, function(err, data) {
                if (err) {
                    return reject(err);
                }

                var delta = (new Date() - startTime) / 1000;
                console.log('Completed upload in', delta, 'seconds');
                console.log('Final upload data:', data);

                return resolve();
            });
        });
    }

    return new Promise(async (resolve, reject) => {
        try {
            head = await getHead();
            fileSize = head.ContentLength;
        } catch (e) {
            return reject(e);
        }

        numParts = Math.ceil(fileSize / partSize);

        console.log('Creating multipart upload for:', to_key);

        try {
            uploadId = await createMultipartUpload();
        } catch (e) {
            return reject(e);
        }

        for (let start = 0; start < fileSize; start += partSize) {
            partNum++;
            console.log("Part Num: " + partNum);

            try {
                await copyPart(start, partNum);
            } catch (e) {
                console.error(e);
                return reject(e);
            }
        }

        try {
            await completeMultipartUpload();
        } catch (e) {
            return reject(e);
        }

        resolve();
    });
}

@darshak-gosar
Copy link

Hi,
While using S3Multipart upload functionality, I am facing a issue with the ETag null for one of the part and the script execution fails.
Error:

Upload part error: { 
RequestTimeTooSkewed: The difference between the request time and the current time is too large,
message:
   'The difference between the request time and the current time is too large.',
  code: 'RequestTimeTooSkewed',
  region: null,
  time: 2020-08-18T16:08:46.209Z,
  requestId: 'B2F9C493F61440E3',
  extendedRequestId:
   'mIP9HfZoqxZVBWaDtj900m/KwIA2uLu2oyWrfp7GnJhj7hg8mygRJVEdOS3TYMp/QSuj4mLhjPc=',
  cfId: undefined,
  statusCode: 403,
  retryable: false,
  retryDelay: 28.44339801766571 
}

Has anyone come across such error ?

@tigran10
Copy link

Hey @egekhter, was just looking at your code, wondering where is the getContentType(to_key) method coming from?

@soubhikchatterjee
Copy link

Just one question. Does this occupy the nodejs server's diskspace during the upload process?

@danicunhac
Copy link

Wrote part of this script in a less verbose way
https://gist.github.com/danicunhac/b458cf1fbb40482b74254f18c8492ad3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment