mnot/test_http_conns.py

## test_http_conns.py
#!/usr/bin/env python
"""
test_http_conns.py - Tests for how HTTP persistent connections are handled,
including pipelining.

This script will set up a server which summarises how a browser connecting
to it behaves. For example, FireFox with pipelining turned on will give:

--->8---

~> ./test_http_conns.py 8001
PID: 11458
Point your browser at port 8001.
  request-uri: /
  request-uri: /image?1
[...]
  request-uri: /image?27
^C
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-GB; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8
conn 1 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
conn 2 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
conn 3 - 6 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
conn 4 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
conn 5 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
conn 6 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)

---8<---

Requires:
  * Twisted - <http://twistedmatrix.com/>
"""

__author__ = "Mark Nottingham <mnot@mnot.net>"
__copyright__ = """\
Copyright (c) 2008 Mark Nottingham

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""


import re
import time
from urllib import urlopen
from twisted.internet import protocol
from twisted.protocols import basic

try:
    from twisted.internet import epollreactor
    epollreactor.install()
except ImportError:
    pass

from twisted.internet import reactor

# Where to get the image from.
image_url = "http://www.mnot.net/photo/2006/06/edinburgh/Thumbs/_DSC0029.jpg"

# How long to delay before processing requests; useful if you're testing
# on the same host and the client doesn't get a chance to pipeline.
delay = 0.1

CRLF = re.compile(r"\r?\n", re.M)
MSG_DELIMITER = re.compile(r"\r?\n\r?\n", re.M)

class ConnectionServer(basic.LineReceiver):
    """ assume GET for now. """
    line_mode = 0
    connections = {}
    conn_count = 0
    responses = {
        '/':            (200, "OK", "OK", "text/plain"),
        '/error/404':   (404, "Not Found", "sorry, not found", "text/plain"),
        '/error/500':   (500, "Internal Server Error", "sorry", "text/plain"),
        '/image':       (200, "OK", urlopen(image_url).read(), "image/jpeg"),
    }

    def __init__(self, root_body=None, root_type="text/html"):
        if root_body:
            self.responses['/'] = (200, "OK", root_body, root_type)
        self.input = ""
        self.request_queue = []
        self.id = id(self)

    def rawDataReceived(self, data):
        self.input += data
        if len(re.findall(MSG_DELIMITER, self.input)) > 0:
            requests = filter(None, re.split(MSG_DELIMITER, self.input))
            self.input = ""
            for request in requests:
                self.request_queue.append(request)
                reactor.callLater(delay, self.process_request)

    def process_request(self):
        inp = self.request_queue.pop()
        conn_data = ConnectionServer.connections[self.id]
        conn_data['max_queue'] = max(
            len(self.request_queue), conn_data['max_queue']
        )
        request_lines = re.split(CRLF, inp)
        if conn_data['request_counter'] == 0:
            for line in request_lines:
                if line[:11].lower() == "user-agent:":
                    conn_data['user_agent'] = line[11:].strip()
                    break
        request_line = request_lines[0]
        request_method, _rest = request_line.split(None, 1)
        request_uri, http_version = _rest.rsplit(None, 1)
        conn_data['http_version'] = http_version
        sys.stderr.write("  request-uri: %s\n" % request_uri)
        request_path = request_uri.split("?", 1)[0]
        conn_data['request_counter'] += 1
        status_code, status_phrase, response_body, content_type = \
          self.responses.get(request_path, (200, "OK", "default", 'text/plain'))
        self.sendLine("HTTP/1.1 %s %s" % (status_code, status_phrase))
        self.sendLine("Content-Type: %s" % content_type)
        self.sendLine("Connection: keep-alive")
        self.sendLine("Content-Length: %s" % len(response_body))
        # http://lxr.mozilla.org/mozilla/source/netwerk/protocol/http/src/nsHttpConnection.cpp#247
        self.sendLine("Server: Apache/2.4")
        self.sendLine("")
        self.transport.write(response_body)
        conn_data['start_time'] = time.time()

    def connectionMade(self):
        ConnectionServer.conn_count += 1
        try:
            assert(not ConnectionServer.connections.has_key(self.id))
        except AssertionError:
            sys.stderr.write("WTF! %s\n" % self.id)
            ConnectionServer.conn_count -= 1
            self.transport.loseConnection()
        ConnectionServer.connections[self.id] = {
            'request_counter': 0,
            'start_time': time.time(),
            'stop_time': 0,
            'max_queue': 0,
            'user_agent': "",
            'http_version': "",
        }

    def connectionLost(self, reason):
        ConnectionServer.connections[self.id]['stop_time'] = time.time()
        ConnectionServer.conn_count -= 1
        reactor.callLater(1, self.checkDone)

    def checkDone(self):
        if ConnectionServer.conn_count == 0:
            sys.stderr.write("All connections closed.\n")
            try:
                reactor.stop()
            except RuntimeError:
                pass


class ConnectionServerFactory(protocol.ServerFactory):
    protocol = ConnectionServer


if __name__ == "__main__":
    import sys, os

    root_body = """\
<html>
<head>
</head>
<body>
    <img src="/image?1"/>
    <img src="/image?2"/>
    <img src="/image?3"/>
    <img src="/image?4"/>
    <img src="/image?5"/>
    <img src="/image?6"/>
    <img src="/image?7"/>
    <img src="/image?8"/>
    <img src="/image?9"/>
    <img src="/image?10"/>
    <img src="/image?11"/>
    <img src="/image?12"/>
    <img src="/image?13"/>
    <img src="/image?14"/>
    <img src="/image?15"/>
    <img src="/image?16"/>
    <img src="/image?17"/>
    <img src="/image?18"/>
    <img src="/image?19"/>
    <img src="/image?20"/>
    <img src="/image?21"/>
    <img src="/image?22"/>
    <img src="/image?23"/>
    <img src="/image?24"/>
    <img src="/image?25"/>
    <img src="/image?26"/>
    <img src="/image?27"/>
    <img src="/image?28"/>
    <img src="/image?29"/>
    <img src="/image?30"/>
</body>
</html>"""

    if not len(sys.argv) == 2:
        print "Usage: %s port" % sys.argv[0]
        sys.exit(1)
    port = int(sys.argv[1])
    ConnectionServer.responses['/'] = (200, "OK", root_body, "text/html")
    sys.stderr.write("PID: %s\n" % os.getpid())
    sys.stderr.write("Point your browser at port %i.\n" % port)
    reactor.listenTCP(port, ConnectionServerFactory())
    try:
        reactor.run()
    finally:
        print
        conns = ConnectionServer.connections.items()
        i = 0
        print ConnectionServer.connections.values()[0]['user_agent']
        for cid, conn in conns:
            i += 1
            pipelining = ""
            if conn['max_queue'] > 0:
                pipelining = "(pipeline held at most %s)" % conn['max_queue']
            print "conn %i - %i %s requests, then idle %i seconds %s" % (
                i,
                conn['request_counter'],
                conn['http_version'],
                (conn['stop_time'] - conn['start_time']),
                pipelining
            )
	#!/usr/bin/env python
	"""
	test_http_conns.py - Tests for how HTTP persistent connections are handled,
	including pipelining.

	This script will set up a server which summarises how a browser connecting
	to it behaves. For example, FireFox with pipelining turned on will give:

	--->8---

	~> ./test_http_conns.py 8001
	PID: 11458
	Point your browser at port 8001.
	request-uri: /
	request-uri: /image?1
	[...]
	request-uri: /image?27
	^C
	Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-GB; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8
	conn 1 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
	conn 2 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
	conn 3 - 6 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
	conn 4 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
	conn 5 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)
	conn 6 - 5 HTTP/1.1 requests, then idle 2 seconds (pipeline held at most 3)

	---8<---

	Requires:
	* Twisted - <http://twistedmatrix.com/>
	"""

	__author__ = "Mark Nottingham <mnot@mnot.net>"
	__copyright__ = """\
	Copyright (c) 2008 Mark Nottingham

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in
	all copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	THE SOFTWARE.
	"""


	import re
	import time
	from urllib import urlopen
	from twisted.internet import protocol
	from twisted.protocols import basic

	try:
	from twisted.internet import epollreactor
	epollreactor.install()
	except ImportError:
	pass

	from twisted.internet import reactor

	# Where to get the image from.
	image_url = "http://www.mnot.net/photo/2006/06/edinburgh/Thumbs/_DSC0029.jpg"

	# How long to delay before processing requests; useful if you're testing
	# on the same host and the client doesn't get a chance to pipeline.
	delay = 0.1

	CRLF = re.compile(r"\r?\n", re.M)
	MSG_DELIMITER = re.compile(r"\r?\n\r?\n", re.M)

	class ConnectionServer(basic.LineReceiver):
	""" assume GET for now. """
	line_mode = 0
	connections = {}
	conn_count = 0
	responses = {
	'/': (200, "OK", "OK", "text/plain"),
	'/error/404': (404, "Not Found", "sorry, not found", "text/plain"),
	'/error/500': (500, "Internal Server Error", "sorry", "text/plain"),
	'/image': (200, "OK", urlopen(image_url).read(), "image/jpeg"),
	}

	def __init__(self, root_body=None, root_type="text/html"):
	if root_body:
	self.responses['/'] = (200, "OK", root_body, root_type)
	self.input = ""
	self.request_queue = []
	self.id = id(self)

	def rawDataReceived(self, data):
	self.input += data
	if len(re.findall(MSG_DELIMITER, self.input)) > 0:
	requests = filter(None, re.split(MSG_DELIMITER, self.input))
	self.input = ""
	for request in requests:
	self.request_queue.append(request)
	reactor.callLater(delay, self.process_request)

	def process_request(self):
	inp = self.request_queue.pop()
	conn_data = ConnectionServer.connections[self.id]
	conn_data['max_queue'] = max(
	len(self.request_queue), conn_data['max_queue']
	)
	request_lines = re.split(CRLF, inp)
	if conn_data['request_counter'] == 0:
	for line in request_lines:
	if line[:11].lower() == "user-agent:":
	conn_data['user_agent'] = line[11:].strip()
	break
	request_line = request_lines[0]
	request_method, _rest = request_line.split(None, 1)
	request_uri, http_version = _rest.rsplit(None, 1)
	conn_data['http_version'] = http_version
	sys.stderr.write(" request-uri: %s\n" % request_uri)
	request_path = request_uri.split("?", 1)[0]
	conn_data['request_counter'] += 1
	status_code, status_phrase, response_body, content_type = \
	self.responses.get(request_path, (200, "OK", "default", 'text/plain'))
	self.sendLine("HTTP/1.1 %s %s" % (status_code, status_phrase))
	self.sendLine("Content-Type: %s" % content_type)
	self.sendLine("Connection: keep-alive")
	self.sendLine("Content-Length: %s" % len(response_body))
	# http://lxr.mozilla.org/mozilla/source/netwerk/protocol/http/src/nsHttpConnection.cpp#247
	self.sendLine("Server: Apache/2.4")
	self.sendLine("")
	self.transport.write(response_body)
	conn_data['start_time'] = time.time()

	def connectionMade(self):
	ConnectionServer.conn_count += 1
	try:
	assert(not ConnectionServer.connections.has_key(self.id))
	except AssertionError:
	sys.stderr.write("WTF! %s\n" % self.id)
	ConnectionServer.conn_count -= 1
	self.transport.loseConnection()
	ConnectionServer.connections[self.id] = {
	'request_counter': 0,
	'start_time': time.time(),
	'stop_time': 0,
	'max_queue': 0,
	'user_agent': "",
	'http_version': "",
	}

	def connectionLost(self, reason):
	ConnectionServer.connections[self.id]['stop_time'] = time.time()
	ConnectionServer.conn_count -= 1
	reactor.callLater(1, self.checkDone)

	def checkDone(self):
	if ConnectionServer.conn_count == 0:
	sys.stderr.write("All connections closed.\n")
	try:
	reactor.stop()
	except RuntimeError:
	pass


	class ConnectionServerFactory(protocol.ServerFactory):
	protocol = ConnectionServer


	if __name__ == "__main__":
	import sys, os

	root_body = """\
	<html>
	<head>
	</head>
	<body>
	<img src="/image?1"/>
	<img src="/image?2"/>
	<img src="/image?3"/>
	<img src="/image?4"/>
	<img src="/image?5"/>
	<img src="/image?6"/>
	<img src="/image?7"/>
	<img src="/image?8"/>
	<img src="/image?9"/>
	<img src="/image?10"/>
	<img src="/image?11"/>
	<img src="/image?12"/>
	<img src="/image?13"/>
	<img src="/image?14"/>
	<img src="/image?15"/>
	<img src="/image?16"/>
	<img src="/image?17"/>
	<img src="/image?18"/>
	<img src="/image?19"/>
	<img src="/image?20"/>
	<img src="/image?21"/>
	<img src="/image?22"/>
	<img src="/image?23"/>
	<img src="/image?24"/>
	<img src="/image?25"/>
	<img src="/image?26"/>
	<img src="/image?27"/>
	<img src="/image?28"/>
	<img src="/image?29"/>
	<img src="/image?30"/>
	</body>
	</html>"""

	if not len(sys.argv) == 2:
	print "Usage: %s port" % sys.argv[0]
	sys.exit(1)
	port = int(sys.argv[1])
	ConnectionServer.responses['/'] = (200, "OK", root_body, "text/html")
	sys.stderr.write("PID: %s\n" % os.getpid())
	sys.stderr.write("Point your browser at port %i.\n" % port)
	reactor.listenTCP(port, ConnectionServerFactory())
	try:
	reactor.run()
	finally:
	print
	conns = ConnectionServer.connections.items()
	i = 0
	print ConnectionServer.connections.values()[0]['user_agent']
	for cid, conn in conns:
	i += 1
	pipelining = ""
	if conn['max_queue'] > 0:
	pipelining = "(pipeline held at most %s)" % conn['max_queue']
	print "conn %i - %i %s requests, then idle %i seconds %s" % (
	i,
	conn['request_counter'],
	conn['http_version'],
	(conn['stop_time'] - conn['start_time']),
	pipelining
	)