Skip to content

Instantly share code, notes, and snippets.

@boldfield
Last active March 8, 2016 16:57
Show Gist options
  • Save boldfield/66561b4f875511c053f2 to your computer and use it in GitHub Desktop.
Save boldfield/66561b4f875511c053f2 to your computer and use it in GitHub Desktop.
Simple PoC for chunked client side uploads to S3

Very basic PoC for uploading files to S3 clientside using presigned URLs provided by the AWS Ruby SDK.

function handleSubmit(event) {
event.preventDefault();
el = $(this);
var file = el[0][2].files[0];
var data = {
dataset: el[0][0].value || Math.floor(Math.random() * 100 + 1),
chunk_size: el[0][1].value || 1024,
size: file.size,
file: file.name
}
function chunkedUpload(data, stat) {
var uploader = new ChunkedUploader(file, data);
uploader.onUploadComplete = function() {
var meta_url = uploader.meta.metadata.upload_url;
$.ajax({
type: "PUT",
url: meta_url,
data: JSON.stringify(uploader.upload_metadata),
success: function() {
alert("DONE!");
}
});
}
window.uploader = uploader;
uploader.start();
}
$.ajax({
type: "POST",
url: "/upload",
data: data,
success: chunkedUpload
});
};
$(document).ready(function() {
$("#upload-data").submit(handleSubmit);
})
// Adapted from: http://creativejs.com/tutorials/advanced-uploading-techniques-part-1/
Object.size = function(obj) {
var size = 0, key;
for (key in obj) { if (obj.hasOwnProperty(key)) size++; }
return size;
};
function ChunkedUploader(file, uploadMetadata, options) {
if (!this instanceof ChunkedUploader) {
return new ChunkedUploader(file, options);
}
this.file = file;
this.options = options;
this.meta = uploadMetadata
this.num_chunks = Object.size(uploadMetadata.chunks);
this.chunk_num = 0;
this.file_size = this.file.size;
this.chunk_size = Math.ceil(this.file_size / this.num_chunks);
console.log("Chunk Count: " + this.num_chunks);
console.log("Chunk Size: " + this.chunk_size);
console.log("File Size: " + this.file_size);
this.range_start = 0;
this.range_end = this.chunk_size;
this.upload_metadata = {
domain: this.meta.domain,
id: this.meta.uid,
source_file: this.meta.file,
dataset: this.meta.dataset,
chunks: []
}
if ('mozSlice' in this.file) {
this.slice_method = 'mozSlice';
}
else if ('webkitSlice' in this.file) {
this.slice_method = 'webkitSlice';
}
else {
this.slice_method = 'slice';
}
this.upload_request = new XMLHttpRequest();
this.upload_request.onload = this._onChunkComplete;
}
ChunkedUploader.prototype = {
_upload: function() {
var self = this,
urls = self.meta.chunks[self.chunk_num],
chunk;
// Slight timeout needed here (File read / AJAX readystate conflict?)
setTimeout(function() {
// Prevent range overflow
if (self.range_end > self.file_size) {
self.range_end = self.file_size;
}
chunk = self.file[self.slice_method](self.range_start, self.range_end);
self.upload_request.open('PUT', urls.upload_url, true);
self.upload_metadata.chunks.push(urls.uri);
self.upload_request.overrideMimeType('application/octet-stream');
self.upload_request.send(chunk);
}, 20);
},
_onChunkComplete: function() {
// so, so, so very ugly
var self = window.uploader;
// If the end range is already the same size as our file, we
// can assume that our last chunk has been processed and exit
// out of the function.
if (self.range_end === self.file_size) {
self.onUploadComplete()
return;
}
// Update our ranges
self.range_start = self.range_end;
self.range_end = self.range_start + self.chunk_size;
self.chunk_num += 1;
// Continue as long as we aren't paused
if (!self.is_paused) {
self._upload();
}
},
start: function() {
this._upload();
},
pause: function() {
this.is_paused = true;
},
resume: function() {
this.is_paused = false;
this._upload();
}
};
<html>
<head>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap-theme.min.css">
<script src="/index.js"></script>
<script src="/uploader.js"></script>
</head>
<body>
<div class="container">
<h1 class="bd-title" id="content">Data data, gimmy data!</h1>
<div class="col-md-9">
<form id="upload-data">
<div class="form-group row">
<label for="datasetId" class="col-sm-2 form-control-label">Dataset ID</label>
<div class="col-sm-10">
<textarea class="form-control" id="datasetId" rows="1"></textarea>
</div>
</div>
<div class="form-group row">
<label for="chunkSize" class="col-sm-2 form-control-label">Chunk Size</label>
<div class="col-sm-10">
<textarea class="form-control" id="chunkSize" placeholder="1024" rows="1"></textarea>
</div>
</div>
<div class="form-group row">
<label for="exampleInputFile" class="col-sm-2 form-control-label">Data File</label>
<div class="col-sm-10">
<input type="file" class="form-control-file" id="dataFile">
</div>
</div>
<div class="form-group row">
<div class="col-sm-offset-2 col-sm-10">
<button type="submit" class="btn btn-secondary">Begin Upload</button>
</div>
</div>
</form>
</div>
</div>
</body>
</html>
require 'sinatra'
require 'aws-sdk-resources'
require 'net/http'
require 'securerandom'
AWS_REGION = 'us-west-2'
BUCKET_NAME = 'test-buckets-r-us'
METADATA_BUCKET_NAME = 'test-buckets-r-us-metadata' # Bucket set up for lambda notifications
CHUNK_SIZE = 1024
get '/' do
erb :index
end
post '/upload' do
content_type :json
id = SecureRandom.uuid.to_s
key_root = "#{request.host.strip}/#{params['dataset']}/#{id}"
file_name = params['file']
fname_parts = file_name.split('.')
file_prefix = fname_parts[0..-2].join('.')
file_suffix = fname_parts[-1]
file_size = params['size'].to_i
chunk_size = (params['chunk_size'] || CHUNK_SIZE).to_i
puts file_size
puts chunk_size
fnames = (0..(file_size / chunk_size)).map { |i| "#{file_prefix}-chunk-#{i}.#{file_suffix}" }
s3 = Aws::S3::Resource.new(region: AWS_REGION)
data = {
'file' => file_name,
'dataset' => params['dataset'],
'uid' => id,
'domain' => request.host.strip,
'chunks' => [],
'metadata'=> {}
}
fnames.each do |fname|
obj = s3.bucket(BUCKET_NAME).object("#{key_root}/#{fname}")
data['chunks'] << {
'upload_url'=> obj.presigned_url(:put),
'uri'=> "s3://#{BUCKET_NAME}/#{key_root}/#{fname}"
}
end
obj = s3.bucket(METADATA_BUCKET_NAME).object("#{key_root}.json")
data['metadata']['upload_url'] = obj.presigned_url(:put)
data['metadata']['uri'] = "s3://#{METADATA_BUCKET_NAME}/#{key_root}.json"
data.to_json
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment