Skip to content

Instantly share code, notes, and snippets.

@greed2411
Created July 5, 2021 09:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save greed2411/c0b3a3be130683c0e2c6c2e06ba09c03 to your computer and use it in GitHub Desktop.
Save greed2411/c0b3a3be130683c0e2c6c2e06ba09c03 to your computer and use it in GitHub Desktop.
Want to generate sorted random numbers optimally? Here you go.
"""
Want to generate sorted random numbers optimally? Here you go.
Traditionally if you wanted a list of sorted random numbers,
you generate `n` random numbers and then sort them. which at worst
case has:
Time Complexity : O(n log n) # for generating & sorting.
Space Complexity: O(n) # for storing all of them in array/set.
These python implementations take up:
Time Complexity : O(n)
Space Complexity: O(1) # online generation
where `n` is the number of random numbers you want.
Performance runs are mentioned below in the end.
The implementations are derived from this 1980 paper,
@article{bentley1980generating,
title={Generating sorted lists of random numbers},
author={Bentley, Jon Louis and Saxe, James B},
journal={ACM Transactions on Mathematical Software (TOMS)},
volume={6},
number={3},
pages={359--364},
year={1980},
publisher={ACM New York, NY, USA}
}
https://kilthub.cmu.edu/articles/journal_contribution/Generating_sorted_lists_of_random_numbers/6605957/1
scihub:
https://sci-hub.se/https://doi.org/10.1145/355900.355907
Useful links:
http://www.augustana.ualberta.ca/~mohrj/algorithms/randpick.html
https://stackoverflow.com/questions/1866031/generating-sorted-random-ints-without-the-sort-on
https://github.com/brentp/sorted-random-go
https://docs.python.org/3/library/random.html#real-valued-distributions
https://github.com/python/cpython/blob/main/Lib/random.py#L128
IMPORTANT CAVEAT: because of `round` and `int`, the uniquely generated floats while getting converted
to int are not unique anymore. try max: 263 and asking n: 100.
"""
import math
import random
from typing import Generator
def bentley_saxe_sorted_randint_sampler(n: int, maximum_number: int, reverse: bool = False) -> Generator:
"""
n: the amount of numbers you want to sample.
maximum_number: every number generated must be less than or equal to this number.
usual generated range: [0, maximum_number).
reverse: just like `sorted()`, pass a bool if you want the list to be in ascending (default)
or descending order.
derived implementation of Program 4 mentioned in Bentley and Saxe (1980)
Note: might not be all unique if `n` is signifcant compared against `maximum_number`.
"""
i = n
lncurmax = 0.0
while i > 0:
rand_float = random.uniform(0.0, 1.0)
lncurmax += (math.log(rand_float)/i)
i -= 1
if reverse:
gen_float = math.exp(lncurmax)
else:
gen_float = 1 - math.exp(lncurmax)
yield int(gen_float * maximum_number)
def brentp_sampler(a: int, b: int, n: int, reverse: bool = False) -> Generator:
"""
a: the minimum number in the generated range [a, b].
b: the maximum number in the generated range [a, b].
n: the amount of numbers you want to sample.
reverse: just like `sorted()`, pass a bool if you want the list to be in ascending (default)
or descending order.
python implementation of what brentp tried.
https://github.com/brentp/sorted-random-go/blob/master/sortedrandom.go
Note: might not be all unique if `n` is signifcant compared against the range [a, b],
limited by unix nano seconds and float to integer conversion.
"""
i = n
curmax = 1.0
span = b - a + 1
while i > 0:
rand_float = random.random()
curmax *= math.pow(rand_float, 1.0/i)
i -= 1
if reverse:
gen_float = curmax
else:
gen_float = 1 - curmax
yield a + int(gen_float * span)
if __name__ == "__main__":
# usage:
sorted_rand_sampler = bentley_saxe_sorted_randint_sampler(100, 263)
for number in sorted_rand_sampler:
print(number)
sorted_rand_sampler = brentp_sampler(0, 263, 100)
for number in sorted_rand_sampler:
print(number)
#### IGNORE FROM HERE ####
### PERFORMANCE RUNS ###
N = 1_234_567
UPPER_RANGE = 99_999_999
TRIALS = 100
import timeit
import numpy as np
bentley_saxe_timing = timeit.timeit(lambda: list(bentley_saxe_sorted_randint_sampler(N, UPPER_RANGE)), number=TRIALS)
brentp_timing = timeit.timeit(lambda: list(brentp_sampler(0, UPPER_RANGE, N)), number=TRIALS)
ord_sort_rand = timeit.timeit(lambda: list(random.sample(range(UPPER_RANGE), N)).sort(), number=TRIALS)
np_sort_rand = timeit.timeit(lambda: np.random.uniform(0, UPPER_RANGE, N).sort(), number=TRIALS)
print(f"time in seconds, while running {TRIALS = }, sampling {N:_} numbers from range [0, {UPPER_RANGE:_}]")
print(f"{bentley_saxe_timing = }")
print(f"{brentp_timing = }")
print(f"{ord_sort_rand = }")
print(f"{np_sort_rand = }")
### OUTPUT ###
# time in seconds, while running TRIALS = 100, sampling 1_234_567 numbers from range [0, 99_999_999]
# bentley_saxe_timing = 68.24 seconds # 3rd place
# brentp_timing = 46.45 seconds # 2nd place
# ord_sort_rand = 106.62 seconds # 4th place
# np_sort_rand = 9.32 seconds # 1st place
# therefore brentp & faithful bentley saxe algorithm python implementations
# are faster than standard cpython implementation of sorting and random
# by 2.29x and 1.56x times respectively.
# numpy is much faster than everything of course.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment