Created
June 9, 2015 05:29
-
-
Save allanlei/61550cb9e597cb22b564 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from twisted.web.server import Site | |
from twisted.web.resource import Resource | |
from twisted.internet import reactor | |
from largerequest import LargeRequest | |
import hashlib, os, time, json, shutil, socket, boto | |
# Override tempdir to place temp files in same FS as destination | |
# this allows for better performance using mv rather than copying | |
# or re-writing the file. | |
import tempfile | |
tempfile.tempdir = '/tmp' | |
class FormPage(Resource): | |
# where uploaded files are moved to. | |
basepath='/tmp/' | |
def log(self, logstr): | |
print '%f %s' % (time.time(), logstr) | |
def render_OPTIONS(self, request): | |
request.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') | |
allow_headers = request.getHeader('Access-Control-Allow-Headers') | |
if( isinstance(allow_headers, str) ): | |
request.setHeader('Access-Control-Allow-Headers', allow_headers) | |
return '.' | |
def render_GET(self, request): | |
return """ | |
<html> | |
<head><title>Upload :: %s</title></head> | |
<body> | |
<form method="POST" enctype="multipart/form-data"> | |
Text: <input name="dev" type="text" /><br /> | |
File: <input name="userfile" type="file" /><br /> | |
File: <input name="userfile" type="file" /><br /> | |
<input type="submit" /> | |
</form> | |
</body> | |
</html> | |
""" % socket.gethostname() | |
def render_POST(self, request): | |
self.log('render_POST begun.') | |
if 'userfile' not in request.args: | |
return 'Could not find uploaded file.' | |
results = [] | |
bsize = 4096*1024 | |
for i in range(len(request.args['userfile'])): | |
cur_file = request.args['userfile'][i] | |
cur_filename = request.args['userfile_filename'][i] | |
if not cur_filename: | |
continue | |
newfile = self.basepath + cur_filename | |
temp_type = request.temp_type.__name__ | |
# If a named temporary file is used we can reset the flag so that is it not | |
# deleted [except if we're on Windows] and then move it to its destination | |
# without re-writing. If the destination is on a different FS this is a | |
# moot point as it will be rewritten anyway | |
if temp_type and temp_type == 'NamedTemporaryFile' and os.name != 'nt': | |
self.log("Moving NamedTemporaryFile %s > %s" % (cur_file.name, newfile)) | |
cur_file.delete = False | |
cur_file.close() | |
shutil.move(cur_file.name,newfile) | |
self.log("Move Complete, calculating MD5") | |
fh = open(newfile, 'r') | |
f_hash = hashlib.md5() | |
while 1: | |
buff = fh.read(bsize) | |
if not buff: | |
break | |
f_hash.update(buff) | |
fh.close() | |
cur_hash = f_hash.hexdigest() | |
else: | |
# Copy file and calculate md5 | |
cur_file.seek(0,0) | |
fh = open(newfile, 'w') | |
f_hash = hashlib.md5() | |
while 1: | |
buff = cur_file.read(bsize) | |
if not buff: | |
break | |
fh.write(buff) | |
f_hash.update(buff) | |
fh.close() | |
cur_hash = f_hash.hexdigest() | |
# Get file size | |
cur_size = os.path.getsize(newfile) | |
results.append({'file':cur_filename, 'size':cur_size, 'hash':cur_hash}) | |
self.log("Complete: %s" % results) | |
return json.dumps(results) | |
# switch between twistd daemon mode and regular python invocation | |
# this would be automatic, but the guys in #twisted.web are too | |
# snooty to bestow upon me such a dirty secret. :I | |
in_twistd = False | |
if in_twistd: | |
use_port = 80 | |
else: | |
use_port = 8080 | |
from twisted.application import service, internet | |
root = Resource() | |
root.putChild("upload", FormPage()) | |
factory = Site(root) | |
factory.requestFactory = LargeRequest | |
factory.requestFactory.do_log = True | |
if in_twistd: | |
# 222:500 are default ec2-user UID:GID | |
application = service.Application('uploadhandler', uid=222, gid=500) | |
internet.TCPServer(use_port, factory).setServiceParent( | |
service.IServiceCollection(application)) | |
else: | |
reactor.listenTCP(use_port, factory) | |
reactor.run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tempfile, time, cgi, mimetools, os | |
from twisted.web import server | |
from cgi import parse_header as _parseHeader | |
class LargeRequest(server.Request): | |
# max amount of memory to allow any ~single~ request argument [ie: POSTed file] | |
# to take up before being flushed into a temporary file. | |
# eg: 50 users uploading 4 large files could use up to [and in excess of] | |
# 200 times value specified below. | |
# note: this value seems to be taken with a grain of salt, memory usage may spike | |
# FAR above this value in some cases. | |
# eg: set the memory limit to 5 MB, write 2 blocks of 4MB, mem usage will | |
# have spiked to 8MB before the data is rolled to disk after the | |
# second write completes. | |
memorylimit = 1024*1024*25 | |
# type of tempfile to use. Spooled will be fastest for files/parts smaller than | |
# memorylimit defiend above, Named will be fastest for file uploads when you want | |
# to do: | |
# request.args['file'][0].delete = False | |
# request.args['file'][0].close() | |
# shutil.move(request.args['file'][0].name, new_location) | |
# where tempfile.tempdir is on the same filessystem as new_location | |
temp_type = staticmethod(tempfile.NamedTemporaryFile) | |
# enable/disable debug logging | |
do_log = False | |
# re-defined only for debug/logging purposes | |
def gotLength(self, length): | |
if self.do_log: | |
print '%f Headers received, Content-Length: %d' % (time.time(), length) | |
server.Request.gotLength(self, length) | |
# re-definition of twisted.web.server.Request.requestrecieved, the only difference | |
# is that self.parse_multipart() is used rather than cgi.parse_multipart() | |
def requestReceived(self, command, path, version): | |
if self.do_log: | |
print '%f Request Received' % time.time() | |
self.content.seek(0,0) | |
self.args = {} | |
self.stack = [] | |
self.method, self.uri = command, path | |
self.clientproto = version | |
x = self.uri.split(b'?', 1) | |
if len(x) == 1: | |
self.path = self.uri | |
else: | |
self.path, argstring = x | |
self.args = self.parse_qs(argstring, 1) | |
# cache the client and server information, we'll need this later to be | |
# serialized and sent with the request so CGIs will work remotely | |
self.client = self.channel.transport.getPeer() | |
self.host = self.channel.transport.getHost() | |
# Argument processing | |
args = self.args | |
ctype = self.requestHeaders.getRawHeaders(b'content-type') | |
if ctype is not None: | |
ctype = ctype[0] | |
if self.method == b"POST" and ctype: | |
mfd = b'multipart/form-data' | |
key, pdict = _parseHeader(ctype) | |
if key == b'application/x-www-form-urlencoded': | |
args.update(self.parse_qs(self.content.read(), 1)) | |
elif key == mfd: | |
try: | |
self.content.seek(0,0) | |
args.update(self.parse_multipart(self.content, pdict)) | |
#args.update(cgi.parse_multipart(self.content, pdict)) | |
except KeyError as e: | |
if e.args[0] == b'content-disposition': | |
# Parse_multipart can't cope with missing | |
# content-dispostion headers in multipart/form-data | |
# parts, so we catch the exception and tell the client | |
# it was a bad request. | |
self.channel.transport.write( | |
b"HTTP/1.1 400 Bad Request\r\n\r\n") | |
self.channel.transport.loseConnection() | |
return | |
raise | |
self.content.seek(0, 0) | |
self.process() | |
# re-definition of cgi.parse_multipart that uses a single temporary file to store | |
# data rather than storing 2 to 3 copies in various lists. | |
def parse_multipart(self, fp, pdict): | |
if self.do_log: | |
print '%f Parsing Multipart data: ' % time.time() | |
rewind = fp.tell() #save cursor | |
fp.seek(0,0) #reset cursor | |
boundary = "" | |
if 'boundary' in pdict: | |
boundary = pdict['boundary'] | |
if not cgi.valid_boundary(boundary): | |
raise ValueError, ('Invalid boundary in multipart form: %r' | |
% (boundary,)) | |
nextpart = "--" + boundary | |
lastpart = "--" + boundary + "--" | |
partdict = {} | |
terminator = "" | |
while terminator != lastpart: | |
c_bytes = -1 | |
if self.temp_type.__name__ == 'SpooledTemporaryFile': | |
data = self.temp_type(max_size=self.memorylimit) | |
else: | |
data = self.temp_type() | |
if terminator: | |
# At start of next part. Read headers first. | |
headers = mimetools.Message(fp) | |
clength = headers.getheader('content-length') | |
if clength: | |
try: | |
c_bytes = int(clength) | |
except ValueError: | |
pass | |
if c_bytes > 0: | |
data.write(fp.read(bytes)) | |
# Read lines until end of part. | |
while 1: | |
line = fp.readline() | |
if not line: | |
terminator = lastpart # End outer loop | |
break | |
if line[:2] == "--": | |
terminator = line.strip() | |
if terminator in (nextpart, lastpart): | |
break | |
data.write(line) | |
# Done with part. | |
if data.tell() == 0: | |
continue | |
if bytes < 0: | |
# if a Content-Length header was not supplied with the MIME part | |
# then the trailing line break must be removed. the var 'line' | |
# will still contain the last line written for reference. | |
if line[-2:] == "\r\n": | |
data.seek(-2, os.SEEK_END) | |
data.truncate() | |
elif line[-1:] == "\n": | |
data.seek(-1, os.SEEK_END) | |
data.truncate() | |
line = headers['content-disposition'] | |
if not line: | |
continue | |
key, params = cgi.parse_header(line) | |
if key != 'form-data': | |
continue | |
if 'name' in params: | |
name = params['name'] | |
# kludge in the filename | |
if 'filename' in params: | |
fname_index = name + '_filename' | |
if fname_index in partdict: | |
partdict[fname_index].append(params['filename']) | |
else: | |
partdict[name + '_filename'] = [params['filename']] | |
else: | |
# unnamed parts are not returned at all. | |
continue | |
if name in partdict: | |
data.seek(0,0) | |
partdict[name].append(data) | |
else: | |
data.seek(0,0) | |
partdict[name] = [data] | |
fp.seek(rewind) #restore cursor | |
return partdict |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment