Skip to content

Instantly share code, notes, and snippets.

@vryazanov
Created August 6, 2019 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vryazanov/4446a0a095fba5353236635dd1af4eec to your computer and use it in GitHub Desktop.
Save vryazanov/4446a0a095fba5353236635dd1af4eec to your computer and use it in GitHub Desktop.
Habr proxy
import re
import aiohttp
import aiohttp.web
import bs4
LOCAL_DOMAIN = 'http://localhost:8080/'
HABR_DOMAIN = 'https://habr.com/'
def replace_links(soup):
habr_re = re.compile(f'{HABR_DOMAIN}.*')
for attr_name in ('href', 'xlink:href'):
for tag in soup.find_all(attrs={attr_name: habr_re}):
tag[attr_name] = tag[attr_name].replace(HABR_DOMAIN, LOCAL_DOMAIN)
def replace_words(soup):
exclude_tags = ('script', 'style')
for tag in soup.find_all(text=True):
forbidden_parents = [True for x in exclude_tags if tag.find_parent(x)]
if tag.string != 'html' and not forbidden_parents:
tag.string.replace_with(
re.sub(
r'\b([а-яa-z]{6})\b', r'\1™',
tag.string,
flags=re.IGNORECASE
)
)
async def proxy(request):
path = request.match_info.get('path')
async with aiohttp.ClientSession() as session:
async with session.get(f'{HABR_DOMAIN}{path}') as resp:
content, content_type = await resp.read(), resp.content_type
if content_type != 'text/html':
body = content
else:
soup = bs4.BeautifulSoup(content, 'html.parser')
replace_links(soup)
replace_words(soup)
body = soup.prettify()
return aiohttp.web.Response(body=body, content_type=content_type)
app = aiohttp.web.Application()
app.add_routes([
aiohttp.web.get('/{path:.*}', proxy),
])
aiohttp.web.run_app(app)
@vryazanov
Copy link
Author

How to start:

  1. Install aiohttp and bs4
  2. Execute python server.py
  3. Open http://localhost:8080 in your browser

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment