Skip to content

Instantly share code, notes, and snippets.

@1844144
Last active January 8, 2016 13:42
Show Gist options
  • Save 1844144/96a34b792da1d1f3a7cc to your computer and use it in GitHub Desktop.
Save 1844144/96a34b792da1d1f3a7cc to your computer and use it in GitHub Desktop.
Add TM
import argparse
import subprocess
import os
import sys
import BaseHTTPServer
from SimpleHTTPServer import SimpleHTTPRequestHandler
import requests
from lxml import etree
from StringIO import StringIO
def add_tm(text):
""" copy text char by char and add tm
if we counted exactly 6 chars """
count = 0
out = ""
for char in text:
if char.isalpha():
count += 1
else:
if count == 6:
out += u'\u2122'
count = 0
out += char
if count == 6:
out += u'\u2122'
return out
def process_line(line, encoding):
""" process html page, text are processed on 'tag end' events,
skipping <script> tags """
allowed = True
tree = etree.iterparse(StringIO(line), events=('start', 'end'), html=True)
for e, data in tree:
if e == 'start' and data.tag == 'script':
allowed = False
if e == 'end':
if allowed:
if data.text and len(data.text) >= 6:
data.text = add_tm(data.text)
# tail is text after tag closing (before next tag)
if data.tail and len(data.tail) >= 6:
data.tail = add_tm(data.tail)
if data.tag == 'script':
allowed = True
return etree.tostring(tree.root, method='html', encoding=encoding)
class MyHttpHandler(SimpleHTTPRequestHandler):
site = ''
def do_GET(self):
"""Serve a GET request."""
r = requests.get('http://'+self.site+self.path, stream=True)
content_type = r.headers['content-type']
tmp = StringIO()
if 'text/html' in content_type:
tmp.write(process_line(r.content, r.encoding))
else:
tmp.write(r.content)
length = tmp.tell()
tmp.seek(0)
# set needed headers
self.send_response(r.status_code)
self.send_header('content-length', length)
self.send_header('content-type', content_type)
self.end_headers()
self.copyfile(tmp, self.wfile)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=u'Add \u2122 to each 6-character word on page.')
parser.add_argument('-p', '--port', default=8000,
type=int, help='local port, 8000')
parser.add_argument('-H', '--host', default='localhost',
type=str, help='hostname, localhost')
parser.add_argument('-s', '--site', default='habrahabr.ru',
type=str, help='site (without http://) , habrahabr.ru')
args = parser.parse_args()
print "Launching proxy on {}:{}, opening {}".format(
args.host, args.port, args.site)
if args.site == 'habrahabr.ru':
page = 'company/yandex/blog/258673'
else:
page = ''
subprocess.Popen(['xdg-open', 'http://{}:{}/{}'.format(
args.host, args.port, page)])
MyHttpHandler.site = args.site
BaseHTTPServer.HTTPServer(
(args.host, args.port), MyHttpHandler).serve_forever()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment