This little snippet can be used to keep bots occupied, by sending them random data very slowly.
It will not block them, but it will slow them down.
To add a tarpit to your flask application, you need two parts:
Some logic to detect bot/malicious requests and this snippet.
The detection logic depends on your app structure, so you have to implement it yourself.
For the tarpit part, here is an example to send requests to the tarpit, which request not defined endpoints (404).
First add these two classes to your code (here it is added to Utils.py)
import time
from io import BytesIO, RawIOBase
from typing import BinaryIO, Optional, Callable
class LoopFile(RawIOBase, BinaryIO):
def __init__(self, heading_data, loop_data):
# type: (bytes, bytes) -> None
self.heading_file = BytesIO(heading_data)
self.loop_file = BytesIO(loop_data)
def _loop_read(self, n=None):
# type: (Optional[int]) -> bytes
data = self.loop_file.read(n)
if not data:
self.loop_file.seek(0, 0)
data = self.loop_file.read(n)
return data
def read(self, size=None):
# type: (Optional[int]) -> bytes
if self.heading_file.closed:
return self._loop_read(size)
else:
data = self.heading_file.read(size)
if not data:
self.heading_file.close()
return self._loop_read(size)
else:
return data
class SlowLoopFile(LoopFile):
def __init__(self, heading_data, loop_data, logger, log_frequency, read_delay=None, max_read_size=None,
fast_bytes=None):
# type: (bytes, bytes, Optional[Callable[[float], None]], Optional[float], Optional[float], Optional[int], Optional[int]) -> None
super(SlowLoopFile, self).__init__(heading_data, loop_data)
self.read_delay = read_delay
self.max_read_size = max_read_size
self.fast_bytes = fast_bytes
self.processed_bytes = 0
self.start = time.time()
self.last_log = time.time()
if logger and not log_frequency:
raise ValueError
self.logger = logger
self.log_frequency = log_frequency
def read(self, size=None):
if self.logger:
current_time = time.time()
if current_time - self.last_log >= self.log_frequency:
self.last_log = current_time
self.logger(current_time - self.start)
if not (self.fast_bytes and self.processed_bytes < self.fast_bytes):
if self.max_read_size:
size = self.max_read_size
time.sleep(self.read_delay)
data = super().read(size) or b''
self.processed_bytes += len(data)
return data
and add a custom 404 template (as string or as .html file). Here its defined as string, to reduce disk IO.
NOT_FOUND_HTML_FILE = """<h1>Oops! Looks like you are trying to request an not existing page.</h1>
<p>If you are not a bot, you may want to abort the page loading now.</p>
<p>If you are a bot, have fun.</p>
<p>"""
Then create a method to handle page-not-found-events.
def page_not_found(e):
from flask import send_file, request, current_app
import random
import string
from Utils import SlowLoopFile
# gather request info for logging
path = request.path
flask_logger = current_app.logger
request_remote_addr = request.remote_addr
flask_logger.info('trapping {} for requested path {}'.format(request_remote_addr, path))
# build random data
random_string_len = 10
random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=random_string_len)).encode('utf-8')
# instantiate a SlowLoopFile, logger and log_frequency can be omitted
slow_file = SlowLoopFile(heading_data=NOT_FOUND_HTML_FILE.encode('utf-8'),
loop_data=random_string,
logger=lambda duration: flask_logger.info(
'trapping {} for requested path {} for {} seconds'.format(
request_remote_addr, path, int(duration))),
log_frequency=30.0,
read_delay=10,
max_read_size=10,
fast_bytes=len(LOGIN_FAILED_HTML_FILE))
return send_file(slow_file, mimetype='text/html')
and register it on your flask app.
def create_app():
import flask
app = flask.Flask(__name__)
...
app.register_error_handler(404, page_not_found)
...
return app
And you are done.
Requests will now get the custom HTML Response. Humans will read, that they should abort page loading.
Bots will read indefinite random data from an open <p>
tag.
If bots abort instantly because of the 404 response, change the send_file line to this.
return send_file(slow_file, mimetype='text/html'), 200
Now a Page-Not-Found error will produce the 'wrong' HTTP response code. This defeats the purpose of a 404-page, but it will work even better on bots.
You will be happy, because you slowed down a bot and the bot will be happy, because it receives some data :)