Skip to content

Instantly share code, notes, and snippets.

@pankajp
Last active October 19, 2024 11:58
Show Gist options
  • Save pankajp/280596a5dabaeeceaaaa to your computer and use it in GitHub Desktop.
Save pankajp/280596a5dabaeeceaaaa to your computer and use it in GitHub Desktop.
Simple Python HTTP Server with multi-threading and partial-content support
#! /usr/bin/env python
# Standard library imports.
from SocketServer import ThreadingMixIn
import BaseHTTPServer
import SimpleHTTPServer
import sys
import json
import os
from os.path import (join, exists, dirname, abspath, isabs, sep, walk, splitext,
isdir, basename, expanduser, split, splitdrive)
from os import makedirs, unlink, getcwd, chdir, curdir, pardir, rename, fstat
from shutil import copyfileobj, copytree
import glob
from zipfile import ZipFile
from urlparse import urlparse, parse_qs
from urllib import urlopen, quote, unquote
from posixpath import normpath
from cStringIO import StringIO
import re
import ConfigParser
import cgi
import threading
import socket
import errno
DATA_DIR = getcwd() # join(expanduser('~'), APP_NAME)
class ThreadingHTTPServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
pass
class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
""" Handler to handle POST requests for actions.
"""
serve_path = DATA_DIR
def do_GET(self):
""" Overridden to handle HTTP Range requests. """
self.range_from, self.range_to = self._get_range_header()
if self.range_from is None:
# nothing to do here
return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
print 'range request', self.range_from, self.range_to
f = self.send_range_head()
if f:
self.copy_file_range(f, self.wfile)
f.close()
def copy_file_range(self, in_file, out_file):
""" Copy only the range in self.range_from/to. """
in_file.seek(self.range_from)
# Add 1 because the range is inclusive
bytes_to_copy = 1 + self.range_to - self.range_from
buf_length = 64*1024
bytes_copied = 0
while bytes_copied < bytes_to_copy:
read_buf = in_file.read(min(buf_length, bytes_to_copy-bytes_copied))
if len(read_buf) == 0:
break
out_file.write(read_buf)
bytes_copied += len(read_buf)
return bytes_copied
def send_range_head(self):
"""Common code for GET and HEAD commands.
This sends the response code and MIME headers.
Return value is either a file object (which has to be copied
to the outputfile by the caller unless the command was HEAD,
and must be closed by the caller under all circumstances), or
None, in which case the caller has nothing further to do.
"""
path = self.translate_path(self.path)
f = None
if isdir(path):
if not self.path.endswith('/'):
# redirect browser - doing basically what apache does
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
for index in "index.html", "index.htm":
index = join(path, index)
if exists(index):
path = index
break
else:
return self.list_directory(path)
if not exists(path) and path.endswith('/data'):
# FIXME: Handle grits-like query with /data appended to path
# stupid grits
if exists(path[:-5]):
path = path[:-5]
ctype = self.guess_type(path)
try:
# Always read in binary mode. Opening files in text mode may cause
# newline translations, making the actual size of the content
# transmitted *less* than the content-length!
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
if self.range_from is None:
self.send_response(200)
else:
self.send_response(206)
self.send_header("Content-type", ctype)
fs = fstat(f.fileno())
file_size = fs.st_size
if self.range_from is not None:
if self.range_to is None or self.range_to >= file_size:
self.range_to = file_size-1
self.send_header("Content-Range",
"bytes %d-%d/%d" % (self.range_from,
self.range_to,
file_size))
# Add 1 because ranges are inclusive
self.send_header("Content-Length",
(1 + self.range_to - self.range_from))
else:
self.send_header("Content-Length", str(file_size))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or @ for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
f.write('<li><a href="%s">%s</a>\n'
% (quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n</body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
""" Override to handle redirects.
"""
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = normpath(unquote(path))
words = path.split('/')
words = filter(None, words)
path = self.serve_path
for word in words:
drive, word = splitdrive(word)
head, word = split(word)
if word in (curdir, pardir): continue
path = join(path, word)
return path
# Private interface ######################################################
def _get_range_header(self):
""" Returns request Range start and end if specified.
If Range header is not specified returns (None, None)
"""
range_header = self.headers.getheader("Range")
if range_header is None:
return (None, None)
if not range_header.startswith("bytes="):
print "Not implemented: parsing header Range: %s" % range_header
return (None, None)
regex = re.compile(r"^bytes=(\d+)\-(\d+)?")
rangething = regex.search(range_header)
if rangething:
from_val = int(rangething.group(1))
if rangething.group(2) is not None:
return (from_val, int(rangething.group(2)))
else:
return (from_val, None)
else:
print 'CANNOT PARSE RANGE HEADER:', range_header
return (None, None)
def get_server(port=8000, next_attempts=0, serve_path=None):
Handler = RequestHandler
if serve_path:
Handler.serve_path = serve_path
while next_attempts >= 0:
try:
httpd = ThreadingHTTPServer(("", port), Handler)
return httpd
except socket.error as e:
if e.errno == errno.EADDRINUSE:
next_attempts -= 1
port += 1
else:
raise
def main(args=None):
if args is None:
args = sys.argv[1:]
PORT = 8000
if len(args)>0:
PORT = int(args[-1])
serve_path = DATA_DIR
if len(args) > 1:
serve_path = abspath(args[-2])
httpd = get_server(port=PORT, serve_path=serve_path)
print "serving at port", PORT
httpd.serve_forever()
if __name__ == "__main__" :
main()
@tforgione
Copy link

tforgione commented Nov 4, 2016

Very nice work, and very useful to have a simple http server that supports partial content request (which I use a lot), thanks for this.

However, please correct me if I'm wrong, but I think there is a mistake at line 59 : maybe you want something like

read_buf = in_file.read(min(buf_length, left_to_copy - bytes_copied))

since left_to_copy doesn't change during the loop, the quantity that is still to copy is left_to_copy - bytes_copied.

@pankajp
Copy link
Author

pankajp commented Dec 15, 2017

Thanks a lot, I have fixed it now.

@hosseinzoda
Copy link

I've made a fork with support of partial content for HEAD requests.
https://gist.github.com/hosseinamin/cdaa16cf7361e678b07cc2cb118e0482

@ZeppLu
Copy link

ZeppLu commented Apr 19, 2019

if you want ipv6 support, simply change

class ThreadingHTTPServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
    pass

to

class ThreadingHTTPServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
    address_family = socket.AF_INET6

for lazy guys, just check out my fork

@sbguy01
Copy link

sbguy01 commented Oct 16, 2020

Trying to turn my single threaded HTTP server into a multi-thread one on Windows Server. I found this project and used it as a guide. I'm not quite sure how multi-thread HTTPserver is supposed to behave when running inside a single command prompt window. I bombarded my server with 2 clients and whenever one client has a large request, the other client waits until the first client is done. I'm using python 3.8.

Thanks!

@pankajp
Copy link
Author

pankajp commented Oct 16, 2020

@sbguy1 The basic thing for multithreading is to have ThreadingMixIn as the first base class of your http server class. You can try to add some debug logging to see where it is getting stuck at, and whether it is starting multiple threads to handle the requests. Without looking at code it is difficult to figure out what the problem might be.

@sbguy01
Copy link

sbguy01 commented Oct 16, 2020

hi pankajp! Since python 3.7, multithreaded http server is supposedly very easy.

The server is just a data retriever from some external source. When a client calls for a doLargeTask, I can see that the server pauses in the Command Prompt window waiting for the task to finish, and the other client calling for doSmallTask, just waits until the large task is done.

Here is my pseudo-code. What I am missing? Many thanks!

from http.server import HTTPServer, BaseHTTPRequestHandler, ThreadingHTTPServer
from socketserver import ThreadingMixIn

class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
    pass

class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
	def do_GET(self):
        
		self.send_response(200)
		self.end_headers()
        
          if A: 
		resp = doSmallTask()
                resp = resp.encode('utf-8')
                self.wfile.write(resp)
          if B:
		resp = doLargeTask()
                resp = resp.encode('utf-8')
                self.wfile.write(resp)


	 
httpd = ThreadingHTTPServer(('localhost', 8000), SimpleHTTPRequestHandler)
httpd.serve_forever()

@RaSan147
Copy link

is that python 2 or 3?
the print is statement here, so is it ok to merge thing from here to latest python http.server.py file??

@Lennie
Copy link

Lennie commented Mar 28, 2023

Looks like this would be a Python 3 version:

--- orig_test.py	2023-03-28 14:07:01.541561698 +0200
+++ test.py	2023-03-28 15:18:55.905960179 +0200
@@ -1,36 +1,39 @@
 #! /usr/bin/env python
 
 # Standard library imports.
-from SocketServer import ThreadingMixIn
-import BaseHTTPServer
-import SimpleHTTPServer
+from socketserver import ThreadingMixIn
+from http.server import SimpleHTTPRequestHandler
+import http.server
+from http.server import BaseHTTPRequestHandler, HTTPServer
+#import SimpleHTTPServer
 import sys
 import json
 import os
-from os.path import (join, exists, dirname, abspath, isabs, sep, walk, splitext,
+from os.path import (join, exists, dirname, abspath, isabs, sep, splitext,
     isdir, basename, expanduser, split, splitdrive)
 from os import makedirs, unlink, getcwd, chdir, curdir, pardir, rename, fstat
 from shutil import copyfileobj, copytree
 import glob
 from zipfile import ZipFile
-from urlparse import urlparse, parse_qs
-from urllib import urlopen, quote, unquote
+from urllib.parse import urlparse, parse_qs, unquote, quote
 from posixpath import normpath
-from cStringIO import StringIO
+from io import StringIO
+from io import BytesIO
 import re
-import ConfigParser
-import cgi
+import configparser
+#import cgi
+import html
 import threading
 import socket
 import errno
 
 DATA_DIR = getcwd() # join(expanduser('~'), APP_NAME)
 
-class ThreadingHTTPServer(ThreadingMixIn, BaseHTTPServer.HTTPServer):
+class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
     pass
 
 
-class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
+class RequestHandler(SimpleHTTPRequestHandler):
     """ Handler to handle POST requests for actions.
     """
 
@@ -41,8 +44,8 @@
         self.range_from, self.range_to = self._get_range_header()
         if self.range_from is None:
             # nothing to do here
-            return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
-        print 'range request', self.range_from, self.range_to
+            return SimpleHTTPRequestHandler.do_GET(self)
+        print ('range request', self.range_from, self.range_to)
         f = self.send_range_head()
         if f:
             self.copy_file_range(f, self.wfile)
@@ -145,12 +148,18 @@
             self.send_error(404, "No permission to list directory")
             return None
         list.sort(key=lambda a: a.lower())
-        f = StringIO()
-        displaypath = cgi.escape(unquote(self.path))
-        f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
-        f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
-        f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
-        f.write("<hr>\n<ul>\n")
+        f = BytesIO()
+        displaypath = html.escape(unquote(self.path))
+        f.write(b'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
+        #f.write(b"<html>\n<title>Directory listing for %s</title>\n" % displaypath)
+        f.write(b"<html>\n<title>Directory listing for ")
+        f.write(displaypath.encode())
+        f.write(b"</title>\n")
+        #f.write(b"<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
+        f.write(b"<body>\n<h2>Directory listing for ")
+        f.write(displaypath.encode())
+        f.write(b"</h2>\n")
+        f.write(b"<hr>\n<ul>\n")
         for name in list:
             fullname = os.path.join(path, name)
             displayname = linkname = name
@@ -161,9 +170,14 @@
             if os.path.islink(fullname):
                 displayname = name + "@"
                 # Note: a link to a directory displays with @ and links with /
-            f.write('<li><a href="%s">%s</a>\n'
-                    % (quote(linkname), cgi.escape(displayname)))
-        f.write("</ul>\n<hr>\n</body>\n</html>\n")
+            f.write(b'<li><a href="')
+            f.write(quote(linkname).encode())
+            f.write(b'">')
+            f.write(html.escape(displayname).encode())
+            f.write(b'</a>\n')
+            #f.write(b'<li><a href="%s">%s</a>\n'
+            #        % (quote(linkname), html.escape(displayname)))
+        f.write(b"</ul>\n<hr>\n</body>\n</html>\n")
         length = f.tell()
         f.seek(0)
         self.send_response(200)
@@ -195,11 +209,11 @@
         """ Returns request Range start and end if specified.
         If Range header is not specified returns (None, None)
         """
-        range_header = self.headers.getheader("Range")
+        range_header = self.headers.get("Range")
         if range_header is None:
             return (None, None)
         if not range_header.startswith("bytes="):
-            print "Not implemented: parsing header Range: %s" % range_header
+            print ("Not implemented: parsing header Range: %s" % range_header)
             return (None, None)
         regex = re.compile(r"^bytes=(\d+)\-(\d+)?")
         rangething = regex.search(range_header)
@@ -210,7 +224,7 @@
             else:
                 return (from_val, None)
         else:
-            print 'CANNOT PARSE RANGE HEADER:', range_header
+            print ('CANNOT PARSE RANGE HEADER:', range_header)
             return (None, None)
 
 
@@ -242,8 +256,9 @@
 
     httpd = get_server(port=PORT, serve_path=serve_path)
 
-    print "serving at port", PORT
+    print ("serving at port", PORT)
     httpd.serve_forever()
 
 if __name__ == "__main__" :
     main()

@RaSan147
Copy link

Looks like this would be a Python 3 version:

Nevermind, i managed to make a full blown file server using python 3 http.server and these features
https://github.com/RaSan147/pyrobox

@Lennie
Copy link

Lennie commented Mar 31, 2023

Thanks that seems useful, maybe it actually has to many features. I needed something simple. Maybe I'll send a pull request to configure turning off a bunch of features.

@RaSan147
Copy link

Thanks that seems useful, maybe it actually has to many features. I needed something simple. Maybe I'll send a pull request to configure turning off a bunch of features.

Thats a pretty good idea, ill add a flag --no-script, it will remove all script (and functions, just bare bone download. You can already try it by disabling js in browser), --no-upload disable upload and some other stuffs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment