Skip to content

Instantly share code, notes, and snippets.

@kafeg
Created September 1, 2015 05:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kafeg/3073285e97772495b718 to your computer and use it in GitHub Desktop.
Save kafeg/3073285e97772495b718 to your computer and use it in GitHub Desktop.
Simply python proxy
# demo: http://forsk.ru:8232/company/yandex/blog/258673/
# pip install html5lib beautifulsoup4
# -*- coding: utf-8 -*-
import BaseHTTPServer
import requests
import string
from bs4 import BeautifulSoup
def main():
httpd = BaseHTTPServer.HTTPServer(('0.0.0.0', 8232), TMHandler)
httpd.serve_forever()
class TMHandler(BaseHTTPServer.BaseHTTPRequestHandler):
site = 'habrahabr.ru'
def do_GET(self):
url = 'http://' + self.site + self.path
req = requests.get(url)
content = req.content.replace('http://' + self.site, '')
headers = req.headers
self.send_response(200)
self.send_header('Content-type', headers['content-type'])
self.end_headers()
if headers['content-type'].startswith('text/html'):
content = parsehtml(content)
self.wfile.write(content)
def parsehtml(html):
skip = ('style', 'script', '[document]', 'head', 'title')
doc = BeautifulSoup(html,'html5lib')
for tag in doc.find_all(text=True):
if tag.parent.name in skip:
continue
s = tag.string.strip()
if s:
tag.string.replace_with(transform(s))
return doc.prettify().encode('utf-8')
def isTMWord(s):
excludes = [',', '.']
if (not s.endswith(tuple(excludes)) and len(s) == 6):
return True
else:
return False
def transform(s):
return u' '.join(x + u'™' if isTMWord(x) else x for x in s.split())
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment