Skip to content

Instantly share code, notes, and snippets.

@hcooper
Created November 9, 2011 19:36
Show Gist options
  • Save hcooper/1352672 to your computer and use it in GitHub Desktop.
Save hcooper/1352672 to your computer and use it in GitHub Desktop.
A simple threaded caching HTTP proxy
#!/usr/bin/python
"""
A simple threaded caching HTTP proxy.
Hereward Cooper <coops@fawk.eu>
v0.1
"""
LISTENPORT = 8000
LISTENINT = '127.0.0.1'
REMOTEHOST = 'http://remote.server:80'
CACHEDIR = './cache'
LOGFILE = './proxy.log'
import BaseHTTPServer
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
import hashlib
import os
import urllib2
from SocketServer import ThreadingMixIn
import threading
import sys
class CacheHandler(BaseHTTPServer.BaseHTTPRequestHandler):
def do_GET(self):
'''Hash the query'''
m = hashlib.md5()
m.update(self.path)
hexname = m.hexdigest()
'''Spread cache across subdirectories (for performance)'''
path = CACHEDIR + "/" + hexname[:2]
fullpath = path + "/" + hexname
'''Hit or miss the cache'''
if os.path.exists(fullpath):
open(LOGFILE, "a").write("%s - - [CACHE HIT] [%s] %s\n" %
(self.address_string(),
self.log_date_time_string(),
self.path))
data = open(fullpath).readlines()
else:
if not os.path.exists(path):
os.makedirs(path)
open(LOGFILE, "a").write("%s - - [CACHE MISS] [%s] %s\n" %
(self.address_string(),
self.log_date_time_string(),
self.path))
data = urllib2.urlopen(REMOTEHOST + self.path).readlines()
open(fullpath, 'wb').writelines(data)
'''Return HTTP headers and data'''
self.send_response(200)
self.end_headers()
self.wfile.writelines(data)
def log_message(self, format, *args):
""" Disable the default logging setup as we use our own"""
return
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
"""Handle requests in a separate thread"""
def run():
server_address = (LISTENINT, LISTENPORT)
httpd = ThreadedHTTPServer(server_address, CacheHandler)
print 'Starting proxy on port ' + str(LISTENPORT) + ', use <Ctrl-C> to stop'
print 'Log file: ' + LOGFILE
print 'Cache directory: ' + CACHEDIR
httpd.serve_forever()
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment