kylef/html.py

## html.py
"""
The idea of this was to strip responses of any unnecessary
characters to save bandwidth, after some testing the savings
were only up-to 8% on rare occasion.
"""

import re

re_is_html = re.compile(r'(*)</html>(*)')

class HTMLOptimizerMiddleware(object):
    """
    This middleware strips unnessicery characters from a HTML
    document in order to save bytes. It will removes things
    like CR, LR, TAB's and multiples spaces.

    This middleware should go before any CacheMiddleware or GZipMiddleware
    """
    def process_response(self, request, response):
        # It's not worth compressing non-OK or really short responses.
        if response.status_code != 200 or len(response.content) < 1000:
            return response

        # Check if it is html.
        if not re_is_html.search(response.content):
            return response

        # Strip CR, LR, TAB, and more than one space
        response.content = response.content.replace('\r', '')
        response.content = response.content.replace('\n', '')
        response.content = response.content.replace('\t', '')
        response.content = response.content.replace('  ', '')

        return response
	"""
	The idea of this was to strip responses of any unnecessary
	characters to save bandwidth, after some testing the savings
	were only up-to 8% on rare occasion.
	"""

	import re

	re_is_html = re.compile(r'()</html>()')

	class HTMLOptimizerMiddleware(object):
	"""
	This middleware strips unnessicery characters from a HTML
	document in order to save bytes. It will removes things
	like CR, LR, TAB's and multiples spaces.

	This middleware should go before any CacheMiddleware or GZipMiddleware
	"""
	def process_response(self, request, response):
	# It's not worth compressing non-OK or really short responses.
	if response.status_code != 200 or len(response.content) < 1000:
	return response

	# Check if it is html.
	if not re_is_html.search(response.content):
	return response

	# Strip CR, LR, TAB, and more than one space
	response.content = response.content.replace('\r', '')
	response.content = response.content.replace('\n', '')
	response.content = response.content.replace('\t', '')
	response.content = response.content.replace(' ', '')

	return response