anirudhpillai/next_num.py

## next_num.py
from functools import lru_cache
from typing import List

import math
import random


class RandomGen:
    """
    Class for a random generator that selects a number from a given list.
    The probability of selecting a number is passed in as a separate list.

    Attributes:
        :_random_nums  Values that may be returned by next_num()
        :_probabilities Probability of the occurence of random_nums
        :_csum_of_prob Cumulative sum of probabilities
    """

    def __init__(self, random_nums: List[int], probabilities: List[int]):
        # compare to floating point precision
        if not math.isclose(sum(probabilities), 1):
            raise ValueError("probabilities don't add up to 1")

        if len(random_nums) != len(probabilities):
            raise ValueError("probabilities not provided for all random nums")

        if len(random_nums) != len(set(random_nums)):
            raise ValueError("random_nums can't contain duplicates")

        self._random_nums = random_nums
        self._probabilities = probabilities
        self._csum_of_prob: List[int] = []

        csum = 0
        for prob in probabilities:
            csum += prob
            self._csum_of_prob.append(csum)

    def next_num(self) -> int:
        """
        O(log(n)) operation to get a random number.

        Returns one of the _random_nums. When this method is called
        multiple times over a long period, it should return the
        numbers roughly with the initialized probabilities.

        Works by using a cumulative sum of the probabilities.
        Then each index in cumulative sum array becomes a 'bucket' for
        the random number to land in. So if the random number is in the
        range [_csum_of_prob[i-1], _csum_of_prob[i]) then we return
        self._random_nums[i] as the random number fell in the bucket
        for index i.  (if i == 0 then _csum_of_prob[i-1] = 0)
        """
        rnum = random.random()

        # we must use bisect_right and not bisect_left
        # as random.random returns [0, 1) where 1 isn't included
        # so in the cumulative sum, of [0.5, 0.7, 1]
        # if rnum == 0.7, the index returned should be 2 (not 1)
        # as 0.7 will correspond to the third item, since random.random
        # will never return 1 and the 'bucket' for the first item (index 0)
        # is [0, 0.5) inclusive of 0
        idx = self._bisect_right(rnum)
        return self._random_nums[idx]

    @lru_cache()
    def _bisect_right(self, random_number: float):
        """
        My implementation of Python's bisect.bisect_right method.
        Apart for this assignment, I would use the bisect module as
        because of it being written in cpython, it would be a lot
        faster than my implementation.

        Returns:
            :int i such that all items in self._csum_of_prob[:i]
                have value <= random_number, and all items in
                self._csum_of_prob[i:] have value > random_number.

        Have also used lru_cache in this method as I assume RandomGen
        would be initialised once and then will get multiple next_num
        calls, so in case random.random() returns the same, number it
        will be quicker to find the bisection index, changing this O(log(n))
        operation into a O(n) operation.

        The efficiency of using the cache will depend on how often
        random.random() is likely to return the same number. If it isn't
        very common then the cache will just increase memory consumption :)
        """
        lo, hi = 0, len(self._csum_of_prob)
        while lo < hi:
            mid = (lo + hi) // 2
            if random_number < self._csum_of_prob[mid]:
                hi = mid
            else:
                lo = mid + 1

        return lo

    def next_num_o_of_n(self) -> int:
        """
        O(n) solution
        """
        rnum = random.random()

        for i, prob in enumerate(self._probabilities):
            rnum -= prob
            if rnum < 0:
                return self._random_nums[i]
	from functools import lru_cache
	from typing import List

	import math
	import random


	class RandomGen:
	"""
	Class for a random generator that selects a number from a given list.
	The probability of selecting a number is passed in as a separate list.

	Attributes:
	:_random_nums Values that may be returned by next_num()
	:_probabilities Probability of the occurence of random_nums
	:_csum_of_prob Cumulative sum of probabilities
	"""

	def __init__(self, random_nums: List[int], probabilities: List[int]):
	# compare to floating point precision
	if not math.isclose(sum(probabilities), 1):
	raise ValueError("probabilities don't add up to 1")

	if len(random_nums) != len(probabilities):
	raise ValueError("probabilities not provided for all random nums")

	if len(random_nums) != len(set(random_nums)):
	raise ValueError("random_nums can't contain duplicates")

	self._random_nums = random_nums
	self._probabilities = probabilities
	self._csum_of_prob: List[int] = []

	csum = 0
	for prob in probabilities:
	csum += prob
	self._csum_of_prob.append(csum)

	def next_num(self) -> int:
	"""
	O(log(n)) operation to get a random number.

	Returns one of the _random_nums. When this method is called
	multiple times over a long period, it should return the
	numbers roughly with the initialized probabilities.

	Works by using a cumulative sum of the probabilities.
	Then each index in cumulative sum array becomes a 'bucket' for
	the random number to land in. So if the random number is in the
	range [_csum_of_prob[i-1], _csum_of_prob[i]) then we return
	self._random_nums[i] as the random number fell in the bucket
	for index i. (if i == 0 then _csum_of_prob[i-1] = 0)
	"""
	rnum = random.random()

	# we must use bisect_right and not bisect_left
	# as random.random returns [0, 1) where 1 isn't included
	# so in the cumulative sum, of [0.5, 0.7, 1]
	# if rnum == 0.7, the index returned should be 2 (not 1)
	# as 0.7 will correspond to the third item, since random.random
	# will never return 1 and the 'bucket' for the first item (index 0)
	# is [0, 0.5) inclusive of 0
	idx = self._bisect_right(rnum)
	return self._random_nums[idx]

	@lru_cache()
	def _bisect_right(self, random_number: float):
	"""
	My implementation of Python's bisect.bisect_right method.
	Apart for this assignment, I would use the bisect module as
	because of it being written in cpython, it would be a lot
	faster than my implementation.

	Returns:
	:int i such that all items in self._csum_of_prob[:i]
	have value <= random_number, and all items in
	self._csum_of_prob[i:] have value > random_number.

	Have also used lru_cache in this method as I assume RandomGen
	would be initialised once and then will get multiple next_num
	calls, so in case random.random() returns the same, number it
	will be quicker to find the bisection index, changing this O(log(n))
	operation into a O(n) operation.

	The efficiency of using the cache will depend on how often
	random.random() is likely to return the same number. If it isn't
	very common then the cache will just increase memory consumption :)
	"""
	lo, hi = 0, len(self._csum_of_prob)
	while lo < hi:
	mid = (lo + hi) // 2
	if random_number < self._csum_of_prob[mid]:
	hi = mid
	else:
	lo = mid + 1

	return lo

	def next_num_o_of_n(self) -> int:
	"""
	O(n) solution
	"""
	rnum = random.random()

	for i, prob in enumerate(self._probabilities):
	rnum -= prob
	if rnum < 0:
	return self._random_nums[i]