Skip to content

Instantly share code, notes, and snippets.

@danielsoneg
Created October 29, 2020 00:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielsoneg/de6ae9cae1e308f0a766f6a31219ebc4 to your computer and use it in GitHub Desktop.
Save danielsoneg/de6ae9cae1e308f0a766f6a31219ebc4 to your computer and use it in GitHub Desktop.
capture.py: Capture HTTP requests for replay
"""capture.py: Capture HTTP requests for replay
This script provides a function that listens for an HTTP request on the given
address and port, captures that request, and generates a urllib Request object
by which the request can be replayed against a different server. This server
always responds with 200 and an empty body. Returning other responses is left
as an exercise for the reader.
This is designed to use just the python built-ins and to be useable from a
REPL.
Usage:
>>> import capture
>>> req = capture.capture_request("127.0.0.1", 8000)
------Elsewhere...-------
$ curl -H "MyHeader: MyValue" http://localhost:8000/path/path2\?query\=param -d '{"some":"payload"}
------Back here...-------
>>> req
<capture.CapturedRequest object at 0x7f82fd127750>
>>> req.headers
{'Host': 'localhost:8000',
'User-agent': 'curl/7.64.1',
'Accept': '*/*',
'Myheader': 'MyValue',
'Content-length': '18',
'Content-type': 'application/x-www-form-urlencoded'
}
>>> req.full_url
'http://localhost:8000/path/path2?query=param'
>>> req.data
b'{"some":"payload"}'
>>> req.set_url("https://actual.site/target/path")
>>> req.full_url
'https://actual.site/target/path?query=param'
>>> import urllib
>>> import urllib.request
>>> urllib.request.urlopen(req)
"""
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import Request
from urllib.parse import urlsplit, urlunsplit
def capture_request(host, port):
return CaptureServer((host, port)).capture_request()
class CapturedRequest(Request):
def set_url(self, new_url):
"""Set target url for request replay."""
scheme, host, path, query, fragment = urlsplit(self.full_url)
scheme, host, path, _, _ = urlsplit(new_url)
self.full_url = urlunsplit((scheme, host, path, query, fragment))
self.headers["Host"] = host
class CaptureServer(HTTPServer):
def __init__(self, server_address):
self.last_request = None
self.allow_reuse_address = True
super().__init__(server_address, CaptureRequestHandler, False)
def capture_request(self):
self.last_request = None
try:
self.server_bind()
self.server_activate()
self.handle_request()
if self.last_request:
return self.last_request
except:
self.server_close()
raise
class CaptureRequestHandler(BaseHTTPRequestHandler):
def do_request(self):
# This seems like a bug in BaseHTTPRequestHandler.
self._headers_buffer = []
try:
cl = int(self.headers.get("Content-Length", 0))
except:
print("Couldn't read content-length, proceding without capturing body")
cl = 0
body = self.rfile.read(cl)
# This doesn't support HTTPS, so assume HTTP
req = CapturedRequest("http://" + self.headers.get("Host") + self.path, data=body,
headers=dict(self.headers.items()), method=self.command)
self.server.last_request = req
self.send_response_only(200)
self.end_headers()
return True
def __getattr__(self, name):
if name.startswith("do_"):
return self.do_request
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment