guss77/RateLimiter.java

## RateLimiter.java
package coil.geek;

import java.util.concurrent.atomic.AtomicLong;

/**
 * A token-based rate limiter, inspired by https://stackoverflow.com/a/55349487/53538
 *
 * The original implementation boasts an O(1) time complexity as well as O(1) space complexity, but controls
 * concurrency using synchronization. This implementation uses atomic operations to offer lock-less concurrency
 * and reentrancy.
 *
 * This implementation also attempts to improve on the original in the following aspects:
 * 1. Allow lock-less concurrent access - the call to test the rate will never block on concurrent access.
 * 2. Somewhat faster by doing less calculations for each call.
 * 3. Allow differential costs - different calls can cost a different amount of tokens.
 * 4. In addition to the the yes/no API (implemented by {@link #allowRequest()}, there's
 *   an API for implementations that can delay the request internally to facilitate the rate limit,
 *   so this API can tell the implementation how long to wait, and either check again after that
 *   or to just pre-consume the rate token.
 * 5. For use cases that throttle performance by delaying responses, this implementation offers a
 *   usecase where the caller makes a single call into {@link #whenNextAllowed(int, boolean)}, gets a
 *   delay value and commits to performing the request - by specifying {@code true} in the second
 *   parameter, the token pool can be put into a deficit that will simply cause a request rate higher
 *   than the pool fill rate to produce larger and larger delay values.
 * 6. Allow a fill rate that is different than the limit using the {@link #RateLimiter(int, int, long)}
 *   constructor where the first argument is the limit and you can specify a fill rate using the second
 *   and third argument as with the shorter constructor.
 *
 * The other main difference between this and the original is that in order to use CAS to implement
 * lock-less concurrency, this implementation dropped the arbitrary precision and instead uses cardinal
 * nanoseconds to measure passage of time. The interval is still specified as milliseconds - which means
 * that the amount of available tokens is measured in 0.000001 (millionth) tokens (affectionately
 * nicknamed tockins here) which allows us a 6 digits of precision - which we believe should be enough
 * for most uses.
 *
 * For example, to create a limiter that will allow a concurrent rate of 10 calls per second with a 50
 * call burst, use: {@code new RateLimiter(50, 50, 10, 1000)}
 *
 * License: CC BY-SA 4.0.
 * @author Oded Arbel <oded@geek.co.il>, https://stackoverflow.com/users/8781330/tonywl
 */
public class RateLimiter {
	private final static long TOKEN_SIZE = 1_000_000 /* tockins per token */;
	private final double tokenRate; // measured in tokens per ms
	private final double tockinRate; // measured in tockins per ms
	private final long tockinsLimit;

	private AtomicLong available;
	private AtomicLong lastTimeStamp;

	/**
	 * Create a new rate limiter with the token fill rate specified as
	 * {@code limit}/{@code interval} and a maximum token pool size of {@code limit}
	 * @param limit The maximum number of tokens in the pool
	 * @param interval The time for the pool to be completely filled, in ms
	 */
	public RateLimiter(int limit, long interval) {
		this(0, limit, limit, interval);
	}

	/**
	 * Create a new rate limiter with the token fill rate specified as
	 * {@code fill}/{@code interval} and a maximum token pool size of {@code limit}
	 * @param limit The maximum number of tokens in the pool (burst size)
	 * @param fill How many tokens will be filled in the pool by waiting {@code interval} time
	 * @param interval How long will it take to get {@code fill} tokens back in the pool in ms
	 */
	public RateLimiter(int limit, int fill, long interval) {
		this(0, limit, fill, interval);
	}

	/**
	 * Create a new rate limiter with the token fill rate specified as
	 * {@code fill}/{@code interval} and a maximum token pool size of {@code limit}, starting
	 * with a {@code prefill} amount of tokens ready to be used.
	 * @param prefill instead of starting with an empty pool, assume we "start from rest" and
	 *                have tokens to consume. This value is clamped to {@code limit}.
	 * @param limit The maximum number of tokens in the pool (burst size)
	 * @param fill How many tokens will be filled in the pool by waiting {@code interval} time
	 * @param interval How long will it take to get {@code fill} tokens back in the pool in ms
	 */
	public RateLimiter(int prefill, int limit, int fill, long interval) {
		this.tokenRate = (double)fill / interval;
		this.tockinsLimit = TOKEN_SIZE * limit;
		this.tockinRate = tokenRate * TOKEN_SIZE;
		this.lastTimeStamp = new AtomicLong(System.nanoTime());
		this.available  = new AtomicLong(Math.max(prefill, limit) * TOKEN_SIZE);
	}

	/**
	 * Check if a request is allowed through the rate limiter.
	 * If this call has returned {@code true} then a rate token was consumed, otherwise
	 * the request should be blocked.
	 * @return whether to allow the request
	 */
	public boolean allowRequest() {
		return whenNextAllowed(1, false) == 0;
	}

	/**
	 * Check if a request is allowed through the rate limiter, that has a token cost higher than 1.
	 * If this call has returned {@code true} then a rate token was consumed, otherwise
	 * the request should be blocked.
	 * @param cost how many tokens to consume from the pool, if the request is allowed
	 * @return whether to allow the request
	 */
	public boolean allowRequest(int cost) {
		return whenNextAllowed(cost, false) == 0;
	}

	/**
	 * Check when will the next call be allowed, according to the specified rate.
	 * The value returned is in milliseconds. If the result is 0 - or if {@code alwaysConsume} was
	 * specified then the RateLimiter has recorded that the call has been allowed.
	 * @param alwaysConsume if set to {@code true} this method assumes that the caller will delay
	 *   the action that is rate limited but will perform it without checking again - so it will
	 *   consume the specified number of tokens as if the action has gone through. This means that
	 *   the pool can get into a deficit, which will further delay additional actions.
	 * @return how many milliseconds before this request should be let through.
	 */
	public long whenNextAllowed(boolean alwaysConsume) {
		return whenNextAllowed(1, alwaysConsume);
	}

	/**
	 * Check when will the next call be allowed, according to the specified rate.
	 * The value returned is in milliseconds. If the result is 0 - or if {@code alwaysConsume} was
	 * specified then the RateLimiter has recorded that the call has been allowed.
	 * @param cost How costly is the requested action. The base rate is 1 token per request,
	 *   but the client can declare a more costly action that consumes more tokens.
	 * @param alwaysConsume if set to {@code true} this method assumes that the caller will delay
	 *   the action that is rate limited but will perform it without checking again - so it will
	 *   consume the specified number of tokens as if the action has gone through. This means that
	 *   the pool can get into a deficit, which will further delay additional actions.
	 * @return how many milliseconds before this request should be let through.
	 */
	public long whenNextAllowed(int cost, boolean alwaysConsume) {
		var now = System.nanoTime();
		var last = lastTimeStamp.getAndSet(now);
		// calculate how many tockins we got since last call
		// if the previous call was less than a microsecond ago, we still accumulate at least
		// one tockin, which is probably more than we should, but this is too small to matter - right?
		var add = (long)Math.ceil(tokenRate * (now - last));
		var nowAvailable = available.addAndGet(add);
		while (nowAvailable > tockinsLimit) {
			available.compareAndSet(nowAvailable, tockinsLimit);
			nowAvailable = available.get();
		}

		// answer the question
		var toWait = (long)Math.ceil(Math.max(0, (TOKEN_SIZE - nowAvailable) / tockinRate));
		if (alwaysConsume || toWait == 0) // the caller will let the request go through, so consume a token now
			available.addAndGet(-TOKEN_SIZE);
		return toWait;
	}

}
	package coil.geek;

	import java.util.concurrent.atomic.AtomicLong;

	/**
	* A token-based rate limiter, inspired by https://stackoverflow.com/a/55349487/53538
	*
	* The original implementation boasts an O(1) time complexity as well as O(1) space complexity, but controls
	* concurrency using synchronization. This implementation uses atomic operations to offer lock-less concurrency
	* and reentrancy.
	*
	* This implementation also attempts to improve on the original in the following aspects:
	* 1. Allow lock-less concurrent access - the call to test the rate will never block on concurrent access.
	* 2. Somewhat faster by doing less calculations for each call.
	* 3. Allow differential costs - different calls can cost a different amount of tokens.
	* 4. In addition to the the yes/no API (implemented by {@link #allowRequest()}, there's
	* an API for implementations that can delay the request internally to facilitate the rate limit,
	* so this API can tell the implementation how long to wait, and either check again after that
	* or to just pre-consume the rate token.
	* 5. For use cases that throttle performance by delaying responses, this implementation offers a
	* usecase where the caller makes a single call into {@link #whenNextAllowed(int, boolean)}, gets a
	* delay value and commits to performing the request - by specifying {@code true} in the second
	* parameter, the token pool can be put into a deficit that will simply cause a request rate higher
	* than the pool fill rate to produce larger and larger delay values.
	* 6. Allow a fill rate that is different than the limit using the {@link #RateLimiter(int, int, long)}
	* constructor where the first argument is the limit and you can specify a fill rate using the second
	* and third argument as with the shorter constructor.
	*
	* The other main difference between this and the original is that in order to use CAS to implement
	* lock-less concurrency, this implementation dropped the arbitrary precision and instead uses cardinal
	* nanoseconds to measure passage of time. The interval is still specified as milliseconds - which means
	* that the amount of available tokens is measured in 0.000001 (millionth) tokens (affectionately
	* nicknamed tockins here) which allows us a 6 digits of precision - which we believe should be enough
	* for most uses.
	*
	* For example, to create a limiter that will allow a concurrent rate of 10 calls per second with a 50
	* call burst, use: {@code new RateLimiter(50, 50, 10, 1000)}
	*
	* License: CC BY-SA 4.0.
	* @author Oded Arbel <oded@geek.co.il>, https://stackoverflow.com/users/8781330/tonywl
	*/
	public class RateLimiter {
	private final static long TOKEN_SIZE = 1_000_000 /* tockins per token */;
	private final double tokenRate; // measured in tokens per ms
	private final double tockinRate; // measured in tockins per ms
	private final long tockinsLimit;

	private AtomicLong available;
	private AtomicLong lastTimeStamp;

	/**
	* Create a new rate limiter with the token fill rate specified as
	* {@code limit}/{@code interval} and a maximum token pool size of {@code limit}
	* @param limit The maximum number of tokens in the pool
	* @param interval The time for the pool to be completely filled, in ms
	*/
	public RateLimiter(int limit, long interval) {
	this(0, limit, limit, interval);
	}

	/**
	* Create a new rate limiter with the token fill rate specified as
	* {@code fill}/{@code interval} and a maximum token pool size of {@code limit}
	* @param limit The maximum number of tokens in the pool (burst size)
	* @param fill How many tokens will be filled in the pool by waiting {@code interval} time
	* @param interval How long will it take to get {@code fill} tokens back in the pool in ms
	*/
	public RateLimiter(int limit, int fill, long interval) {
	this(0, limit, fill, interval);
	}

	/**
	* Create a new rate limiter with the token fill rate specified as
	* {@code fill}/{@code interval} and a maximum token pool size of {@code limit}, starting
	* with a {@code prefill} amount of tokens ready to be used.
	* @param prefill instead of starting with an empty pool, assume we "start from rest" and
	* have tokens to consume. This value is clamped to {@code limit}.
	* @param limit The maximum number of tokens in the pool (burst size)
	* @param fill How many tokens will be filled in the pool by waiting {@code interval} time
	* @param interval How long will it take to get {@code fill} tokens back in the pool in ms
	*/
	public RateLimiter(int prefill, int limit, int fill, long interval) {
	this.tokenRate = (double)fill / interval;
	this.tockinsLimit = TOKEN_SIZE * limit;
	this.tockinRate = tokenRate * TOKEN_SIZE;
	this.lastTimeStamp = new AtomicLong(System.nanoTime());
	this.available = new AtomicLong(Math.max(prefill, limit) * TOKEN_SIZE);
	}

	/**
	* Check if a request is allowed through the rate limiter.
	* If this call has returned {@code true} then a rate token was consumed, otherwise
	* the request should be blocked.
	* @return whether to allow the request
	*/
	public boolean allowRequest() {
	return whenNextAllowed(1, false) == 0;
	}

	/**
	* Check if a request is allowed through the rate limiter, that has a token cost higher than 1.
	* If this call has returned {@code true} then a rate token was consumed, otherwise
	* the request should be blocked.
	* @param cost how many tokens to consume from the pool, if the request is allowed
	* @return whether to allow the request
	*/
	public boolean allowRequest(int cost) {
	return whenNextAllowed(cost, false) == 0;
	}

	/**
	* Check when will the next call be allowed, according to the specified rate.
	* The value returned is in milliseconds. If the result is 0 - or if {@code alwaysConsume} was
	* specified then the RateLimiter has recorded that the call has been allowed.
	* @param alwaysConsume if set to {@code true} this method assumes that the caller will delay
	* the action that is rate limited but will perform it without checking again - so it will
	* consume the specified number of tokens as if the action has gone through. This means that
	* the pool can get into a deficit, which will further delay additional actions.
	* @return how many milliseconds before this request should be let through.
	*/
	public long whenNextAllowed(boolean alwaysConsume) {
	return whenNextAllowed(1, alwaysConsume);
	}

	/**
	* Check when will the next call be allowed, according to the specified rate.
	* The value returned is in milliseconds. If the result is 0 - or if {@code alwaysConsume} was
	* specified then the RateLimiter has recorded that the call has been allowed.
	* @param cost How costly is the requested action. The base rate is 1 token per request,
	* but the client can declare a more costly action that consumes more tokens.
	* @param alwaysConsume if set to {@code true} this method assumes that the caller will delay
	* the action that is rate limited but will perform it without checking again - so it will
	* consume the specified number of tokens as if the action has gone through. This means that
	* the pool can get into a deficit, which will further delay additional actions.
	* @return how many milliseconds before this request should be let through.
	*/
	public long whenNextAllowed(int cost, boolean alwaysConsume) {
	var now = System.nanoTime();
	var last = lastTimeStamp.getAndSet(now);
	// calculate how many tockins we got since last call
	// if the previous call was less than a microsecond ago, we still accumulate at least
	// one tockin, which is probably more than we should, but this is too small to matter - right?
	var add = (long)Math.ceil(tokenRate * (now - last));
	var nowAvailable = available.addAndGet(add);
	while (nowAvailable > tockinsLimit) {
	available.compareAndSet(nowAvailable, tockinsLimit);
	nowAvailable = available.get();
	}

	// answer the question
	var toWait = (long)Math.ceil(Math.max(0, (TOKEN_SIZE - nowAvailable) / tockinRate));
	if (alwaysConsume \|\| toWait == 0) // the caller will let the request go through, so consume a token now
	available.addAndGet(-TOKEN_SIZE);
	return toWait;
	}

	}