vryazanov/server.py

## server.py
import re

import aiohttp
import aiohttp.web
import bs4


LOCAL_DOMAIN = 'http://localhost:8080/'
HABR_DOMAIN = 'https://habr.com/'


def replace_links(soup):
    habr_re = re.compile(f'{HABR_DOMAIN}.*')
    for attr_name in ('href', 'xlink:href'):
        for tag in soup.find_all(attrs={attr_name: habr_re}):
            tag[attr_name] = tag[attr_name].replace(HABR_DOMAIN, LOCAL_DOMAIN)


def replace_words(soup):
    exclude_tags = ('script', 'style')
    for tag in soup.find_all(text=True):
        forbidden_parents = [True for x in exclude_tags if tag.find_parent(x)]

        if tag.string != 'html' and not forbidden_parents:
            tag.string.replace_with(
                re.sub(
                    r'\b([а-яa-z]{6})\b', r'\1™',
                    tag.string,
                    flags=re.IGNORECASE
                )
            )


async def proxy(request):
    path = request.match_info.get('path')
    async with aiohttp.ClientSession() as session:
        async with session.get(f'{HABR_DOMAIN}{path}') as resp:
            content, content_type = await resp.read(), resp.content_type

    if content_type != 'text/html':
        body = content
    else:
        soup = bs4.BeautifulSoup(content, 'html.parser')
        replace_links(soup)
        replace_words(soup)
        body = soup.prettify()

    return aiohttp.web.Response(body=body, content_type=content_type)


app = aiohttp.web.Application()
app.add_routes([
    aiohttp.web.get('/{path:.*}', proxy),
])

aiohttp.web.run_app(app)
	import re

	import aiohttp
	import aiohttp.web
	import bs4


	LOCAL_DOMAIN = 'http://localhost:8080/'
	HABR_DOMAIN = 'https://habr.com/'


	def replace_links(soup):
	habr_re = re.compile(f'{HABR_DOMAIN}.*')
	for attr_name in ('href', 'xlink:href'):
	for tag in soup.find_all(attrs={attr_name: habr_re}):
	tag[attr_name] = tag[attr_name].replace(HABR_DOMAIN, LOCAL_DOMAIN)


	def replace_words(soup):
	exclude_tags = ('script', 'style')
	for tag in soup.find_all(text=True):
	forbidden_parents = [True for x in exclude_tags if tag.find_parent(x)]

	if tag.string != 'html' and not forbidden_parents:
	tag.string.replace_with(
	re.sub(
	r'\b([а-яa-z]{6})\b', r'\1™',
	tag.string,
	flags=re.IGNORECASE
	)
	)


	async def proxy(request):
	path = request.match_info.get('path')
	async with aiohttp.ClientSession() as session:
	async with session.get(f'{HABR_DOMAIN}{path}') as resp:
	content, content_type = await resp.read(), resp.content_type

	if content_type != 'text/html':
	body = content
	else:
	soup = bs4.BeautifulSoup(content, 'html.parser')
	replace_links(soup)
	replace_words(soup)
	body = soup.prettify()

	return aiohttp.web.Response(body=body, content_type=content_type)


	app = aiohttp.web.Application()
	app.add_routes([
	aiohttp.web.get('/{path:.*}', proxy),
	])

	aiohttp.web.run_app(app)