Skip to content

Instantly share code, notes, and snippets.

@saintbyte
Created June 11, 2016 18:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saintbyte/8774d01a28cc3e04bbec9544266ecdbd to your computer and use it in GitHub Desktop.
Save saintbyte/8774d01a28cc3e04bbec9544266ecdbd to your computer and use it in GitHub Desktop.
habraproxy.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import urllib
import SimpleHTTPServer
import SocketServer
import sys
import re
import optparse
from bs4 import BeautifulSoup
DEBUG = True
PORT = 1111
class Proxy(SimpleHTTPServer.SimpleHTTPRequestHandler):
fix_content_types = ['text/html']
ignore_tags = ['script','style','img']
def do_GET(self):
print "self.path: {}".format(self.path)
try:
response = urllib.urlopen(self.path) #TODO копировать метод
except:
# Ошибки с сетью бываю , но тут надо просто забыть
return False
# Копирует ответ удаленного сервера
self.send_response(response.getcode())
# Копируем заголовки
need_fix = False
for header in response.info().headers:
(h, v) = header.strip().split(": ",)
debug("{}={}".format(h, v))
self.send_header(h, v)
if h == 'Content-Type':
tp = v
if ";" in v: # Есть блин параметр кодироки чтоли
(tp,addinfo) = v.strip().split("; ",2)
if tp in self.fix_content_types:
need_fix = True # Так найдем в списке для фикс ставим флаг фиксить
self.end_headers()
remote_data = response.read()
if need_fix:
remote_data = self.fix_content(remote_data)
self.wfile.write(remote_data)
def fix_content(self,data):
soup = BeautifulSoup(data,"html.parser")
for tag in soup.find_all():
if not tag.name in self.ignore_tags and not tag.string is None:
if tag.string.strip() == "":
continue
fix_str = re.sub(r"(\b\S{6}\b)", r"\1%s" % "™", tag.string,flags=re.UNICODE & re.MULTILINE)
print "tag.string:"+tag.string
print "fix_str:"+fix_str
if tag.string != fix_str:
tag_str = str(tag)
tag_str_replace = tag_str.decode("utf-8").replace(tag.string,fix_str).encode("utf-8")
print tag_str
print tag_str_replace
data = data.replace(tag_str,tag_str_replace)
return data
def debug(s):
if DEBUG:
print s
def shutdown():
print "Quit..."
quit()
def main():
print 'Starting...'
try:
httpd = SocketServer.ForkingTCPServer(('', PORT), Proxy)
except:
print "Cant start: {}".format(sys.exc_info()[1])
shutdown()
print "serving at port {}".format(PORT)
httpd.serve_forever()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment