Skip to content

Instantly share code, notes, and snippets.

@shurup14
Last active November 20, 2016 11:51
Show Gist options
  • Save shurup14/a7cdc3c1dd6dc9ad4fbbd59df1fcd996 to your computer and use it in GitHub Desktop.
Save shurup14/a7cdc3c1dd6dc9ad4fbbd59df1fcd996 to your computer and use it in GitHub Desktop.
Tornado proxy handler. requires 'pip install tornado bs4 validators'
# coding: utf-8
import re
import tornado.ioloop
import tornado.web
import tornado.httpclient
import tornado.options
import argparse
import signal
import HTMLParser
import webbrowser
from validators.url import url
from urlparse import urlparse
from bs4 import BeautifulSoup, Comment
client = tornado.httpclient.AsyncHTTPClient()
tm = HTMLParser.HTMLParser().unescape('™')
is_closing = False
def p_url():
o = urlparse(args.url)
return o.scheme + '://' + o.netloc, o.path, o.netloc
def url_validate(u):
if url(u):
return u
raise argparse.ArgumentTypeError("%s is not correct url format" % u)
def signal_handler(signum, frame):
global is_closing
is_closing = True
def try_exit():
global is_closing
if is_closing:
tornado.ioloop.IOLoop.instance().stop()
def visible(el):
if el.parent.name in ['style', 'script', '[document]', 'head']:
return False
return True
class ProxyHandler(tornado.web.RequestHandler):
def callback(self, response):
try:
if response.error:
self.write(str(response.error))
else:
if 'text/html' in response.headers['Content-Type']:
soup = BeautifulSoup(response.body, "html5lib")
for a in soup.find_all('a', href=True):
a['href'] = re.sub(r'^http[s]://%s' % p_url()[2],
'http://' + args.host + ':' + str(
args.port), a['href'])
for item in filter(visible, soup.find_all(
text=lambda text: not isinstance(text, Comment))):
item.replace_with(re.sub(ur'(\b\w{6}\b)',
r'\1%s' % tm,
item, flags=re.U))
self.write(str(soup))
else:
self.write(response.body)
finally:
self.finish()
@tornado.web.asynchronous
def get(self):
req = tornado.httpclient.HTTPRequest(
p_url()[0] + self.request.uri)
client.fetch(req, self.callback)
def run_proxy():
tornado.options.parse_command_line()
signal.signal(signal.SIGINT, signal_handler)
app = tornado.web.Application([
(r".*", ProxyHandler),
])
app.listen(args.port, address=args.host)
webbrowser.open('http://' + args.host + ':' + str(args.port) + p_url()[1])
tornado.ioloop.PeriodicCallback(try_exit, 100).start()
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Proxy help')
parser.add_argument(
'--port', type=int, default=8000, help='port number')
parser.add_argument(
'--host', type=str, default='127.0.0.1', help='host address')
parser.add_argument(
'--url', type=url_validate, default='http://habrahabr.ru', help='url')
args = parser.parse_args()
run_proxy()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment