Skip to content

Instantly share code, notes, and snippets.

@SumitGA
Forked from sandfox/node-s3.js
Created June 22, 2014 06:33
Show Gist options
  • Save SumitGA/f6728046d97188374bb9 to your computer and use it in GitHub Desktop.
Save SumitGA/f6728046d97188374bb9 to your computer and use it in GitHub Desktop.
/*
*
* A simple S3 library written because the other implementation doesn't meet needs and is outdated.
*
* What this library is good for:
*
* ** Can send files any size (although S3 is currently limited at 5 gigs)
* ** Streams files from disk to socket -- minimal memory footprint and very efficient
* ** Uses sockets to support 100 Continue (especially usefull for large files)
* - the body is only sent after amazon approves the signed header
* ** Supports amazons new REDUCED_REDUNDANCY storage (cheaper but less reliable)
* ** Supports specifying acl
* ** Creates hash of file to validate contents
*
*
*
*
* Events emited from this S3 library:
*
* PUT :
* progress(percentsent){}, complete(){}, error(exception){}
* DELETE :
* complete(){}, error(exception){}
* GET :
* complete(resultCode){}, error(exception){}
*
* storage types:
* (http://aws.amazon.com/about-aws/whats-new/2010/05/19/announcing-amazon-s3-reduced-redundancy-storage/)
*
* STANDARD
* REDUCED_REDUNDANCY (cheaper, but less reliable)
*
* Acl types:
* (http://docs.amazonwebservices.com/AmazonS3/latest/dev/index.html?RESTAccessPolicy.html)
* private | public-read | public-read-write | authenticated-read | bucket-owner-read | bucket-owner-full-control
*
*/
var sys = require('sys'),
http = require('http'),
crypto = require('crypto'),
fs = require('fs'),
EventEmitter = require('events').EventEmitter,
net = require('net'),
dns = require('dns');
var mime = require('node-mime/mime');
process.on('uncaughtException', function (err) {
console.log('warning! uncaught exception: ' + err);
});
/*
* Amazon's restful documentation =
*
* http://docs.amazonwebservices.com/AmazonS3/latest/index.html?RESTObjectPUT.html
* http://docs.amazonwebservices.com/AmazonS3/index.html?RESTAuthentication.html
* http://docs.amazonwebservices.com/AmazonS3/latest/index.html?RESTCommonRequestHeaders.html
*/
/*
* Amazon S3 signing algorithm -
*
* Authorization = "AWS" + " " + AWSAccessKeyId + ":" + Signature;
*
* Signature = Base64( HMAC-SHA1( UTF-8-Encoding-Of( YourSecretAccessKeyID, StringToSign ) ) );
*
* StringToSign = HTTP-Verb + "\n" +
* Content-MD5 + "\n" +
* Content-Type + "\n" +
* Date + "\n" +
* CanonicalizedAmzHeaders +
* CanonicalizedResource;
*
* CanonicalizedResource = [ "/" + Bucket ] +
* <HTTP-Request-URI, from the protocol name up to the query string> +
* [ sub-resource, if present. For example "?acl", "?location", "?logging", or "?torrent"];
*
* CanonicalizedAmzHeaders = <described below>
*
*/
/*
*/
var S3 = function(awsSecretKey, awsAccessKey, options){
this._awsSecretKey = awsSecretKey;
this._awsAccessKey = awsAccessKey;
options = options || function(){};
this._storageType = options.storageType || 'STANDARD'; // standard storage type
this._acl = options.acl || 'private'; // set status to private on upload
this._checkMD5 = options.checkMD5 || true; // check the md5 on upload
};
S3.prototype = new EventEmitter;
S3.prototype.constructor = S3;
/*
* uploades the file (with path) to the bucket found in the host
*
* the host should either be:
* bucketname.s3.amazonaws.com
* or
* the the whole host:port (no s3.amazonaws.com in the name)
*
*/
S3.prototype.uploadFileToBucket = function(host, fileNameWithPath){
try {
var awsSecretKey = this._awsSecretKey;
var awsAccessKey = this._awsAccessKey;
// capture instance in closure so we don't have to worry about 'this'
var s3 = this;
// check if file is valid... otherwise bail out.
fs.stat(fileNameWithPath, function(err, stats){
if (err) {
s3.emit('error', err);
return;
}
var fileSize = s3._getFileSize(stats);
var fileName = s3._getFileName(fileNameWithPath);
var mimeType = s3._getMimeType(fileNameWithPath);
s3._getBucketName(host, function(res){
var bucketName = res;
var resource = '/' + bucketName + '/' + fileName;
var headers = s3._getPutHeaders(host, mimeType, fileSize);
// the md5 is async so we continue when complete
var afterMD5Callback = function(hash){
if(hash){
headers['Content-MD5'] = hash;
}
// add the amazon authorization header
s3._addAuthorizationHeader(headers, 'PUT', resource, awsSecretKey, awsAccessKey);
// send the put request
s3._sendPutRequest(headers, fileNameWithPath);
};
// add md5 if set in options
if(s3._checkMD5 === true){
s3._getMD5OfMessage(s3, fileNameWithPath, fileSize, afterMD5Callback);
}
else{
afterMD5Callback();
}
});
});
}catch(err){
this.emit('error', err);
}
};
S3.prototype.getFileFromBucket = function(host, fileName, destinationFile){
try {
var awsSecretKey = this._awsSecretKey;
var awsAccessKey = this._awsAccessKey;
var s3 = this;
s3._getBucketName(host, function(res){
var bucketName = res;
var resource = '/' + bucketName + '/' + fileName;
var headers = s3._getGetHeaders(host);
// add the amazon authorization header
s3._addAuthorizationHeader(headers, 'GET', resource, awsSecretKey, awsAccessKey);
// send the get request
s3._sendGetRequest(headers, fileName, destinationFile);
});
}catch(err){
this.emit('error', err);
}
};
/*
* Opens the file and streams it over to amazon after getting the 100 continue.
*
* because the file is streamed from the disk to the socket, memery consumption is minimal and efficient
*
* uses the net.stream instead of http libraries so we can use 100 continue (which isn't supported in the
* http libraris of node.js as of writting this.
*
*
*/
S3.prototype._sendPutRequest = function(headers, fileNameWithPath){
var s3 = this;
// currently just ipv4
dns.resolve4(headers.Host,function (err, addresses) {
if (err) {
s3.emit('error', err);
return;
};
// create a stream connection with the host
stream = net.createConnection(80, host=addresses[0]);
// create a stream from the file we will send
var fileStream = s3._createReadFileStream(fileNameWithPath);
// don't pull anything from the file until we have a connection with the remote server
fileStream.pause();
var progressBar = s3._progressBar(s3, 'progress', headers['Content-Length']);
fileStream.on('data', function(data){
// if stream returns false, then we need to pause reading from file and wait for the content to
// sent across to the remote server
if(!stream.write(data)){
fileStream.pause();
}
// update progress and emit progress
progressBar(data.length);
});
fileStream.on('end', function(){ stream.end(); });
fileStream.on('error', function(ex){
// end both streams on error...
fileStream.destroy();
stream.destroy();
s3.emit('error', ex);
});
stream.on('drain', function(){ fileStream.resume(); });
stream.on('close', function(hadError){ s3.emit('complete'); });
stream.on('end', function(){
stream.end();
});
stream.on('timeout', function(){ });
// make the connection, send headers
stream.on('connect', function(streamConnection){
var headerString = "PUT " + '/' + s3._getFileName(fileNameWithPath) + " HTTP/1.1" + "\n"
for(var header in headers)
{
if (headers[header] !== '') {
headerString += header + ': ' + headers[header] + "\r\n";
}
}
stream.write(headerString += "\r\n");
});
stream.on('error', function(err){
fileStream.destroy();
stream.destroy();
s3.emit('error', err);
});
stream.on('data', function (data) {
var continueHeader = /100\s+continue/i;
var errorHeader = /400\s+Bad\s+Request/i;
//if 100 continue then stream the data to the server
if(continueHeader.test(data)){
fileStream.resume();
}
if(errorHeader.test(data)){
s3.emit('error', data);
fileStream.end();
stream.end();
return;
}
console.log(data);
//todo: check for other errors...
});
});
};
/*
* Opens a file stream and streams the contents from amazon into the file
*
*
*/
S3.prototype._sendGetRequest = function(headers, fileName, destination){
var s3 = this;
var amazon = http.createClient(80, headers.Host);
var fileStream = s3._createWriteFileStream(destination);
if(!fileStream.writeable){
s3.emit('error', 'none writable stream');
return;
}
var request = amazon.request('GET', '/' + fileName, headers);
request.end();
request.on('response', function (response) {
if(response.statusCode == '200'){
var length = response.headers['content-length'];
//create a progress bar so we know how much has been written
var progressBar = s3._progressBar(s3, 'progress', length);
response.on('data', function (chunk) {
if(!fileStream.write(chunk)){
response.pause();
}
progressBar(chunk.length);
});
response.on('end', function(){
fileStream.end();
s3.emit('complete');
});
fileStream.on('drain', function(){
response.resume();
});
fileStream.on('error', function(ex){
response.destory();
fileStream.destroy();
s3.emit('error', ex);
});
}
else{
s3.emit('error', response.statusCode + JSON.stringify(response.headers));
}
});
};
// broken out so we can control in unit testing.
S3.prototype._getMimeType = function(fileName){
return mime.lookup(fileName);
};
// broken out so we can control in unit testing.
S3.prototype._getFileSize = function(stats){
return stats.size;
};
/* gets the bucket name from the host */
S3.prototype._getBucketName = function(host, callback){
host = host || '';
// if it is on the amazonaws.com then its the first part
var regex = /(.*)s3.amazonaws.com$/i;
if(host.match(regex)){
//remove the '.' if it is there
callback(RegExp.$1.replace(/\.$/, ''));
return;
}
//otherwize its everything until the port
regex = /(.*):?[0-9]*$/i;
if(host.match(regex)){
callback(RegExp.$1.replace(/:[0-9]*$/, ''));
return;
}
var error = 'Invalid host';
//console.log(error);
s3.emit('error', error);
};
S3.prototype._addAuthorizationHeader = function(headers, method, resource, awsSecretKey, awsAccessKey){
//amazon canonicalized headres (described in comments below)
var canonicalizedAmzHeaders = this._getCanonicalizedAmzHeaders(headers);
// get the string to sign (see comments on the method)
var stringToSign = this._getStringToSign(headers, method, canonicalizedAmzHeaders, resource);
//sign the headers
var signitureOfHeaders = this._getSignature(awsSecretKey, stringToSign);
// set the amazon authorization header
headers.Authorization = this._getAuthorization(awsAccessKey, signitureOfHeaders);
};
// gets the string to sign based of the headers
S3.prototype._getStringToSign = function(headers, verb, canonicalizedAmzHeaders, canonicalizedResource){
//create the string to sign using the syntax described in comments above
//make sure we have a date... otherwize create one.
var date = headers.Date || new Date().toUTCString();
//leave off the date in the string to sign if we have the amx date
var regex = /x-amz-date/;
if(regex.test(canonicalizedAmzHeaders))
{
date = '';
}
//make sure we have a content type
var contentType = headers['Content-Type'] || '';
//try with an md5
var md5 = headers['Content-MD5'] || '';
//return the string to sign.
return stringToSign =
verb + "\n" +
md5 + "\n" + // (optional)
contentType + "\n" + // (optional)
date + "\n" + // only include if no x-amz-date
canonicalizedAmzHeaders + // can be blank
canonicalizedResource;
};
// returns the authorization header
S3.prototype._getAuthorization = function(awsAccessKey, digest){
return 'AWS ' + awsAccessKey + ":" + digest;
};
// signs the "string to sign" based of the algorithm defined by amazon
S3.prototype._getSignature = function(secretKey, stringToSign){
var hmac = crypto.createHmac('sha1', secretKey);
hmac.update(stringToSign);
return hmac.digest(encoding = 'base64');
};
S3.prototype._getMD5OfMessage = function(instance, file, fileLength, callback){
var hash = crypto.createHash('md5');
var fileStream = this._createReadFileStream(file);
//create a progress bar
var progress = this._progressBar(this, 'hashUpdate', fileLength);
fileStream.on('data', function(data){
hash.update(data);
progress(data.length);
});
fileStream.on('end', function(){
var result = hash.digest(encoding = 'base64');
console.log(result);
callback(result);
});
fileStream.on('error', function(ex){
instance.emit('error', ex);
});
};
S3.prototype._createReadFileStream = function(fileName){
return fs.createReadStream(fileName, { 'flags': 'r'
, 'encoding': 'ascii'
, 'mode': 0666
, 'bufferSize': 4 * 1024
});
};
S3.prototype._createWriteFileStream = function(fileName){
return fs.createWriteStream(fileName, { 'flags': 'w'
, 'encoding': 'ascii'
, 'mode': 0666
});
};
S3.prototype._progressBar = function(instance, eventName, total){
var currentTotal = 0;
var lastPercentage = -1;
return function(update){
//increase by new amount
currentTotal += update;
// percentage of what's been sent
var totalSentPercentage = parseInt((currentTotal / total) * 100);
if(totalSentPercentage != lastPercentage){
lastPercentage = totalSentPercentage;
instance.emit(eventName, lastPercentage);
}
};
};
/*
* canonicalizes amazons headers
*
* use an array as the value for a header with more than one value (see unit tests as an example
* of multiple authors
*
*/
S3.prototype._getCanonicalizedAmzHeaders = function(headers){
var canonicalizedHeaders = [];
var as3Header=/x-amz-/i;
for (var header in headers){
// pull out amazon headers
if(as3Header.test(header)){
var value = headers[header];
if(value instanceof Array)
{
value = value.join(',');
}
canonicalizedHeaders.push(header.toString().toLowerCase() + ':' + value);
}
}
var res = canonicalizedHeaders.sort().join('\n')
//end in new line if any
if(res){
res += '\n';
}
return res;
};
// returns the filename off of the full path to the file (anything after the last '/', or just the name if no '/'
S3.prototype._getFileName = function(fileNameWithPath){
var indexOfForwardSlash = fileNameWithPath.lastIndexOf('/');
return (indexOfForwardSlash !== -1)? fileNameWithPath.substr(indexOfForwardSlash + 1) : fileNameWithPath;
};
// returns the headers for a put request
S3.prototype._getPutHeaders = function(host, mimeType, contentLength){
var instance = this;
return {
'Date': new Date().toUTCString(),
'Content-MD5': '', //todo: do md5
'Host': host, // usually bucketName + '.s3.amazonaws.com',
'Content-Type': mimeType,
'Content-Length': contentLength,
'Expect': '100-continue',
'x-amz-storage-class': instance._storageType,
'x-amz-acl': instance._acl
};
};
S3.prototype._getGetHeaders = function(host){
var instance = this;
return {
'Date': new Date().toUTCString(),
'Host': host
};
}
// export the s3 library
exports.S3 = S3;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment