Created
July 5, 2021 09:05
-
-
Save greed2411/c0b3a3be130683c0e2c6c2e06ba09c03 to your computer and use it in GitHub Desktop.
Want to generate sorted random numbers optimally? Here you go.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Want to generate sorted random numbers optimally? Here you go. | |
Traditionally if you wanted a list of sorted random numbers, | |
you generate `n` random numbers and then sort them. which at worst | |
case has: | |
Time Complexity : O(n log n) # for generating & sorting. | |
Space Complexity: O(n) # for storing all of them in array/set. | |
These python implementations take up: | |
Time Complexity : O(n) | |
Space Complexity: O(1) # online generation | |
where `n` is the number of random numbers you want. | |
Performance runs are mentioned below in the end. | |
The implementations are derived from this 1980 paper, | |
@article{bentley1980generating, | |
title={Generating sorted lists of random numbers}, | |
author={Bentley, Jon Louis and Saxe, James B}, | |
journal={ACM Transactions on Mathematical Software (TOMS)}, | |
volume={6}, | |
number={3}, | |
pages={359--364}, | |
year={1980}, | |
publisher={ACM New York, NY, USA} | |
} | |
https://kilthub.cmu.edu/articles/journal_contribution/Generating_sorted_lists_of_random_numbers/6605957/1 | |
scihub: | |
https://sci-hub.se/https://doi.org/10.1145/355900.355907 | |
Useful links: | |
http://www.augustana.ualberta.ca/~mohrj/algorithms/randpick.html | |
https://stackoverflow.com/questions/1866031/generating-sorted-random-ints-without-the-sort-on | |
https://github.com/brentp/sorted-random-go | |
https://docs.python.org/3/library/random.html#real-valued-distributions | |
https://github.com/python/cpython/blob/main/Lib/random.py#L128 | |
IMPORTANT CAVEAT: because of `round` and `int`, the uniquely generated floats while getting converted | |
to int are not unique anymore. try max: 263 and asking n: 100. | |
""" | |
import math | |
import random | |
from typing import Generator | |
def bentley_saxe_sorted_randint_sampler(n: int, maximum_number: int, reverse: bool = False) -> Generator: | |
""" | |
n: the amount of numbers you want to sample. | |
maximum_number: every number generated must be less than or equal to this number. | |
usual generated range: [0, maximum_number). | |
reverse: just like `sorted()`, pass a bool if you want the list to be in ascending (default) | |
or descending order. | |
derived implementation of Program 4 mentioned in Bentley and Saxe (1980) | |
Note: might not be all unique if `n` is signifcant compared against `maximum_number`. | |
""" | |
i = n | |
lncurmax = 0.0 | |
while i > 0: | |
rand_float = random.uniform(0.0, 1.0) | |
lncurmax += (math.log(rand_float)/i) | |
i -= 1 | |
if reverse: | |
gen_float = math.exp(lncurmax) | |
else: | |
gen_float = 1 - math.exp(lncurmax) | |
yield int(gen_float * maximum_number) | |
def brentp_sampler(a: int, b: int, n: int, reverse: bool = False) -> Generator: | |
""" | |
a: the minimum number in the generated range [a, b]. | |
b: the maximum number in the generated range [a, b]. | |
n: the amount of numbers you want to sample. | |
reverse: just like `sorted()`, pass a bool if you want the list to be in ascending (default) | |
or descending order. | |
python implementation of what brentp tried. | |
https://github.com/brentp/sorted-random-go/blob/master/sortedrandom.go | |
Note: might not be all unique if `n` is signifcant compared against the range [a, b], | |
limited by unix nano seconds and float to integer conversion. | |
""" | |
i = n | |
curmax = 1.0 | |
span = b - a + 1 | |
while i > 0: | |
rand_float = random.random() | |
curmax *= math.pow(rand_float, 1.0/i) | |
i -= 1 | |
if reverse: | |
gen_float = curmax | |
else: | |
gen_float = 1 - curmax | |
yield a + int(gen_float * span) | |
if __name__ == "__main__": | |
# usage: | |
sorted_rand_sampler = bentley_saxe_sorted_randint_sampler(100, 263) | |
for number in sorted_rand_sampler: | |
print(number) | |
sorted_rand_sampler = brentp_sampler(0, 263, 100) | |
for number in sorted_rand_sampler: | |
print(number) | |
#### IGNORE FROM HERE #### | |
### PERFORMANCE RUNS ### | |
N = 1_234_567 | |
UPPER_RANGE = 99_999_999 | |
TRIALS = 100 | |
import timeit | |
import numpy as np | |
bentley_saxe_timing = timeit.timeit(lambda: list(bentley_saxe_sorted_randint_sampler(N, UPPER_RANGE)), number=TRIALS) | |
brentp_timing = timeit.timeit(lambda: list(brentp_sampler(0, UPPER_RANGE, N)), number=TRIALS) | |
ord_sort_rand = timeit.timeit(lambda: list(random.sample(range(UPPER_RANGE), N)).sort(), number=TRIALS) | |
np_sort_rand = timeit.timeit(lambda: np.random.uniform(0, UPPER_RANGE, N).sort(), number=TRIALS) | |
print(f"time in seconds, while running {TRIALS = }, sampling {N:_} numbers from range [0, {UPPER_RANGE:_}]") | |
print(f"{bentley_saxe_timing = }") | |
print(f"{brentp_timing = }") | |
print(f"{ord_sort_rand = }") | |
print(f"{np_sort_rand = }") | |
### OUTPUT ### | |
# time in seconds, while running TRIALS = 100, sampling 1_234_567 numbers from range [0, 99_999_999] | |
# bentley_saxe_timing = 68.24 seconds # 3rd place | |
# brentp_timing = 46.45 seconds # 2nd place | |
# ord_sort_rand = 106.62 seconds # 4th place | |
# np_sort_rand = 9.32 seconds # 1st place | |
# therefore brentp & faithful bentley saxe algorithm python implementations | |
# are faster than standard cpython implementation of sorting and random | |
# by 2.29x and 1.56x times respectively. | |
# numpy is much faster than everything of course. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment