Skip to content

Instantly share code, notes, and snippets.

@sio
Last active October 22, 2019 07:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sio/5c83f2d409ed448207e3684f32f163b9 to your computer and use it in GitHub Desktop.
Save sio/5c83f2d409ed448207e3684f32f163b9 to your computer and use it in GitHub Desktop.
Rate limit access to sensitive resources | Development moved to https://github.com/sio/scrapehelper
'''
Rate limit access to sensitive resources
'''
# Copyright 2019 Vitaly Potyarkin
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import threading
import time
from functools import wraps
class RateLimitReachedError(RuntimeError):
'''Raised when rate limit is reached'''
class RateLimiter:
'''
Honor rate limits when using third party API
RateLimiter objects can be used in any of three ways:
- Through decorator interface
- Through context manager interface
- Manually registering new_call() before sending an API request
'''
# TODO: write tests for RateLimiter
REFRESH_INTERVAL = 0.5 # seconds (minimum sleep interval)
def __init__(self, calls=15, interval=15*60, wait=True):
'''
Create a new RateLimiter object
calls:
Maximum number of API calls allowed within time period
interval:
The time period (in seconds) for which the number of calls is
limited
wait:
Boolean. If True the RateLimiter will wait until limit terms are
satisfied before registering a new call. Will raise
RateLimitReachedError otherwise.
'''
self.call_limit = calls
self.call_log = []
self.interval = interval
self.wait = wait
self.clock = time.monotonic
self.lock = threading.RLock()
self.next_cleanup = 0
def new_call(self):
'''
Register new API call
If self.wait is True, wait until rate limit terms are satisfied.
Raise RateLimitReachedError otherwise.
'''
while True:
try:
self._call_attempt()
break
except RateLimitReachedError as e:
if self.wait:
time.sleep(max(
self.REFRESH_INTERVAL,
self.next_cleanup - self.clock()
))
else:
raise e
def _call_attempt(self):
'''
Try to add a new item to the call log.
Raises RateLimitReachedError if the log is full.
'''
with self.lock:
self._cleanup()
if len(self.call_log) >= self.call_limit:
raise RateLimitReachedError(
'can not make more than {num} calls in {interval} seconds'.format(
num = self.call_limit,
interval = self.interval,
)
)
self.call_log.append(self.clock())
def _cleanup(self):
'''
Maintain the call log: remove obsolete entries, schedule next cleanup
'''
if self.call_log and not self.next_cleanup:
self.next_cleanup = self.call_log[0] + self.interval
while self.next_cleanup\
and self.clock() > self.next_cleanup:
try:
self.call_log.pop(0)
self.next_cleanup = self.call_log[0] + self.interval
except IndexError: # pop from empty list
self.next_cleanup = 0
def __enter__(self):
'''Context manager interface for RateLimiter'''
self.new_call()
def __exit__(self, exc_type, exc_val, exc_tb):
'''Context manager interface for RateLimiter'''
return False
def __call__(self, function):
'''Function decorator interface for RateLimiter'''
@wraps(function)
def decorated(*a, **kw):
self.new_call()
return function(*a, **kw)
return decorated
@jsbohnert
Copy link

Extremely grateful that you went to the effort to implement this alternative, as I was preparing to do something similar after noticing the same issues with the source project. Thank you,

@sio
Copy link
Author

sio commented Oct 22, 2019

You're welcome! :-)

After I posted this gist I've included its code into my scrapehelper package, installable with pip (though not on PyPI): https://github.com/sio/scrapehelper/blob/master/scrapehelper/limit.py

Currently the code is mostly identical (only one property was added: remaining), but all further changes will happen there.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment