Skip to content

Instantly share code, notes, and snippets.

@timka
Forked from anonymous/gist:06e0bd519490c8f03404
Last active September 17, 2015 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timka/1131123ea18ef3c676f7 to your computer and use it in GitHub Desktop.
Save timka/1131123ea18ef3c676f7 to your computer and use it in GitHub Desktop.
Программа не учитвает ситуации, когда внутри слова есть тэг (Log<b>Jam</b>) ибо не охота дальше усложнять. PEP8 — федеральный закон, Python Zen — конституция, где readability does count
# coding: utf-8
# Программа не учитвает ситуации, когда внутри слова есть тэг (Log<b>Jam</b>)
# ибо не охота дальше усложнять.
# PEP8 — федеральный закон, Python Zen — конституция, где readability does count
import argparse
import webbrowser
import multiprocessing
import re
import sys
import SocketServer
import SimpleHTTPServer
from StringIO import StringIO
from HTMLParser import HTMLParser
from xml.sax.saxutils import escape, quoteattr
import requests
# Make setdefaultencoding available
sys = reload(sys)
class Proxy(SimpleHTTPServer.SimpleHTTPRequestHandler):
site = None
def do_GET(self):
try:
response = requests.get(self.site + self.path)
except requests.exceptions.Timeout:
self.send_error(504)
return
except requests.exceptions.ConnectionError:
self.send_error(502)
return
if 'html' not in response.headers['Content-Type']:
self.copyfile(response.raw, self.wfile)
return
sys.setdefaultencoding(response.encoding)
out = StringIO()
mangler = HTMLMangler(out)
try:
mangler.feed(response.text)
mangler.close()
except:
self.send_error(500)
raise
self.send_response(200)
for k, v in response.headers.items():
if k in ['content-encoding', 'transfer-encoding', 'connection']:
continue
self.send_header(k, v)
out.seek(0)
self.copyfile(out, self.wfile)
self.end_headers()
class HTMLMangler(HTMLParser):
skip_tags = 'script style'.split()
pattern = re.compile(r'\b(\w{6})\b', re.UNICODE | re.IGNORECASE)
def __init__(self, out):
HTMLParser.__init__(self)
self.out = out
self.in_tag = None
def handle_startendtag(self, tag, attrs):
self.out.write('<%s' % tag)
self.handle_attrs(attrs)
self.out.write('/>')
def handle_starttag(self, tag, attrs):
self.out.write('<%s' % tag)
self.handle_attrs(attrs)
self.out.write('>')
self.in_tag = tag
def handle_attrs(self, attrs):
for attr, value in attrs:
if value is not None:
self.out.write(' %s=%s' % (attr, quoteattr(value)))
else:
self.out.write(' %s' % attr)
def handle_data(self, data):
if self.in_tag not in self.skip_tags:
data = self.pattern.sub(unicode(r'\1™'), data)
self.out.write(escape(data))
def handle_endtag(self, tag):
self.out.write('</%s>' % tag)
self.in_tag = None
def handle_charref(self, name):
self.out.write('&#%s;' % name)
def handle_entityref(self, name):
self.out.write('&%s;' % name)
def handle_comment(self, data):
self.out.write('<!-- %s -->' % data)
def handle_decl(self, decl):
self.out.write('<!%s>' % decl)
unknown_decl = handle_decl
def handle_pi(self, data):
self.out.write('<?%s>' % data)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--host", help="bind host", default='localhost')
parser.add_argument("--port", help="bind port", type=int, default=1234)
parser.add_argument("--site", help="site url", default='http://habrahabr.ru')
args = parser.parse_args()
Proxy.site = args.site
httpd = SocketServer.ForkingTCPServer((args.host, args.port), Proxy)
p = multiprocessing.Process(target=httpd.serve_forever, args=())
p.start()
host = 'localhost' if args.host == '0.0.0.0' else args.host
webbrowser.open_new_tab('http://' + host + ':' + str(args.port))
raw_input()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment