Skip to content

Instantly share code, notes, and snippets.

@LevBravE
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save LevBravE/fe3a840560379a3d2397 to your computer and use it in GitHub Desktop.
Save LevBravE/fe3a840560379a3d2397 to your computer and use it in GitHub Desktop.
Proxy Server™
# -*- coding: utf-8 -*-
"""
Install twisted (pip install twisted) and run the script.
Небольшой прокси сервер ловящий html контент страницы и модифицирующий его
добаляя после каждого слова состоящего из шести симвалов - ™
Проверенно на Firefox
В настройках Firefox необходимо задать:
HTTP Proxy -> localhost
Port -> 8080
После запустить скрипт и открыть страницу
-> http://habrahabr.ru/company/yandex/blog/258673/
Работает только с http
P.S. Можно попробывать погулять и по другим сайтам ;)
"""
from twisted.internet import reactor
from twisted.web import proxy, http
from HTMLParser import HTMLParser
import re
LST_SUB = []
LST_SUB_MODIFY = []
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.start_tag = ''
self.str_start_tag = ''
self.is_flag = True
def handle_starttag(self, tag, attrs):
self.start_tag = tag
def handle_data(self, data):
tmp_data = data
if self.start_tag not in ['script', 'link']:
if not self.is_flag:
if self.str_start_tag == self.get_starttag_text():
self.str_start_tag = ''
else:
self.is_flag = True
if self.is_flag:
self.str_start_tag = self.get_starttag_text()
self.is_flag = False
str_starttag_text = '%s%s' % (self.str_start_tag, tmp_data)
LST_SUB.append(str_starttag_text)
regex = re.compile(u'(\\b[a-zA-Zа-яА-Я]{6}\\b)', re.U)
tmp_data = regex.sub(r'\1™', tmp_data)
str_modify_text = '%s%s' % (self.str_start_tag, tmp_data)
LST_SUB_MODIFY.append(str_modify_text)
HTMLParser.handle_data(self, tmp_data)
class LoggingProxyClient(proxy.ProxyClient):
def __init__(self, command, rest, version, headers, data, father):
del headers["accept-encoding"]
proxy.ProxyClient.__init__(
self, command, rest, version, headers, data, father
)
self.is_html = False
self.buffer = ""
def handleHeader(self, key, value):
if key.lower() == "content-type" and value.startswith("text/html"):
self.is_html = True
proxy.ProxyClient.handleHeader(self, key, value)
def handleResponsePart(self, buffer):
self.buffer += buffer
def handleResponseEnd(self):
global LST_SUB
global LST_SUB_MODIFY
if not self._finished:
if self.is_html:
print "HTML -> PHTML"
try:
tmp_buffer = self.buffer.decode('utf-8')
parser = MyHTMLParser()
parser.feed(tmp_buffer)
parser.close()
for index, item in enumerate(LST_SUB):
if item in tmp_buffer:
tmp_buffer = tmp_buffer.replace(
item, LST_SUB_MODIFY[index]
)
LST_SUB = []
LST_SUB_MODIFY = []
self.buffer = tmp_buffer.encode('utf-8')
except UnicodeDecodeError:
pass
self.father.responseHeaders.setRawHeaders(
"Content-Length", [str(len(self.buffer))]
)
self.father.write(self.buffer)
proxy.ProxyClient.handleResponseEnd(self)
class LoggingProxyClientFactory(proxy.ProxyClientFactory):
protocol = LoggingProxyClient
class LoggingProxyRequest(proxy.ProxyRequest):
protocols = {"http": LoggingProxyClientFactory}
def process(self):
is_http = self.uri.startswith("http://")
is_https = self.uri.startswith("https://")
if not is_http and not is_https:
self.uri = "http://" + self.getHeader("Host") + self.uri
print "Request from %s for %s" % (self.getClientIP(), self.uri)
try:
proxy.ProxyRequest.process(self)
except KeyError:
print "HTTPS is not supported at the moment!"
class LoggingProxy(proxy.Proxy):
requestFactory = LoggingProxyRequest
class LoggingProxyFactory(http.HTTPFactory):
def buildProtocol(self, addr):
return LoggingProxy()
if __name__ == '__main__':
reactor.listenTCP(8080, LoggingProxyFactory())
reactor.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment