Skip to content

Instantly share code, notes, and snippets.

@anirudhpillai
Created September 3, 2021 12:36
Show Gist options
  • Save anirudhpillai/81dd5604bc0ae7f7613932a4c462cc2f to your computer and use it in GitHub Desktop.
Save anirudhpillai/81dd5604bc0ae7f7613932a4c462cc2f to your computer and use it in GitHub Desktop.
from functools import lru_cache
from typing import List
import math
import random
class RandomGen:
"""
Class for a random generator that selects a number from a given list.
The probability of selecting a number is passed in as a separate list.
Attributes:
:_random_nums Values that may be returned by next_num()
:_probabilities Probability of the occurence of random_nums
:_csum_of_prob Cumulative sum of probabilities
"""
def __init__(self, random_nums: List[int], probabilities: List[int]):
# compare to floating point precision
if not math.isclose(sum(probabilities), 1):
raise ValueError("probabilities don't add up to 1")
if len(random_nums) != len(probabilities):
raise ValueError("probabilities not provided for all random nums")
if len(random_nums) != len(set(random_nums)):
raise ValueError("random_nums can't contain duplicates")
self._random_nums = random_nums
self._probabilities = probabilities
self._csum_of_prob: List[int] = []
csum = 0
for prob in probabilities:
csum += prob
self._csum_of_prob.append(csum)
def next_num(self) -> int:
"""
O(log(n)) operation to get a random number.
Returns one of the _random_nums. When this method is called
multiple times over a long period, it should return the
numbers roughly with the initialized probabilities.
Works by using a cumulative sum of the probabilities.
Then each index in cumulative sum array becomes a 'bucket' for
the random number to land in. So if the random number is in the
range [_csum_of_prob[i-1], _csum_of_prob[i]) then we return
self._random_nums[i] as the random number fell in the bucket
for index i. (if i == 0 then _csum_of_prob[i-1] = 0)
"""
rnum = random.random()
# we must use bisect_right and not bisect_left
# as random.random returns [0, 1) where 1 isn't included
# so in the cumulative sum, of [0.5, 0.7, 1]
# if rnum == 0.7, the index returned should be 2 (not 1)
# as 0.7 will correspond to the third item, since random.random
# will never return 1 and the 'bucket' for the first item (index 0)
# is [0, 0.5) inclusive of 0
idx = self._bisect_right(rnum)
return self._random_nums[idx]
@lru_cache()
def _bisect_right(self, random_number: float):
"""
My implementation of Python's bisect.bisect_right method.
Apart for this assignment, I would use the bisect module as
because of it being written in cpython, it would be a lot
faster than my implementation.
Returns:
:int i such that all items in self._csum_of_prob[:i]
have value <= random_number, and all items in
self._csum_of_prob[i:] have value > random_number.
Have also used lru_cache in this method as I assume RandomGen
would be initialised once and then will get multiple next_num
calls, so in case random.random() returns the same, number it
will be quicker to find the bisection index, changing this O(log(n))
operation into a O(n) operation.
The efficiency of using the cache will depend on how often
random.random() is likely to return the same number. If it isn't
very common then the cache will just increase memory consumption :)
"""
lo, hi = 0, len(self._csum_of_prob)
while lo < hi:
mid = (lo + hi) // 2
if random_number < self._csum_of_prob[mid]:
hi = mid
else:
lo = mid + 1
return lo
def next_num_o_of_n(self) -> int:
"""
O(n) solution
"""
rnum = random.random()
for i, prob in enumerate(self._probabilities):
rnum -= prob
if rnum < 0:
return self._random_nums[i]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment