outofmbufs/nistrandomtests.py

## nistrandomtests.py
import math
import random
import functools
import itertools
from collections import Counter

try:
    from scipy.special import gammaincc
except ModuleNotFoundError:
    def gammaincc(x1, x2):
        raise TypeError("Requires gammaincc; scipy was not found")

# NIST Special Publication 800-22:
#
#    https://csrc.nist.gov/publications/detail/sp/800-22/rev-1a/final
#
# defines statistical tests for randomness. This is an implementation of
# selected tests, done simply as an exercise. For any "real" purpose,
# there are better/faster/more-complete implementations; use them.
# Two known ones in python are:
#    https://github.com/InsaneMonster/NistRng    # pip install nistrng
#    https://github.com/stevenang/randomness_testsuite
#


def streambits(stream, /, *, width, n=None):
    """Generate a sequence of 1's and 0's extracted from a stream of ints.

    stream: iterable generating integer values.
     width: the (implied) width in bits of each element of the stream.
         n: if specified, a limit on how many stream elements to consume.

    NOTE: bits generated LSb to MSb from each element in stream.
    """

    masks = [1 << i for i in range(width)]
    for i in itertools.islice(stream, n):
        yield from (1 if (i & m) else 0 for m in masks)


# this decorator just handles automatically calculating n if it
# was not given. NOTE: of course, not giving n only works if the
# iterable being used is a sequence (i.e., has a len())
def auto_n(f):
    @functools.wraps(f)
    def _wrapped(it, *args, n=None, **kwargs):
        if n is None:
            n = len(it)
        return f(it, *args, n=n, **kwargs)
    return _wrapped


@auto_n
def frequency(it, /, *, n):
    """Perform Monobit test from NIST 800-22 on a sequence of 0's and 1's.

    Section 2.1 in the NIST paper.

    Returns a p value as defined in NIST 800-22, with p < 0.01 considered
    as indicating the bits have failed the test.

    n: length of bits; if defaulted len(bits) will be used.
    """
    sn = sum((-1, 1)[i] for i in itertools.islice(it, n))
    s_obs = abs(sn)/math.sqrt(n)
    return math.erfc(s_obs/math.sqrt(2))


@auto_n
def blocks(it, /, *, n):
    """Batch data into tuples of length n. Throws away last if short."""
    while len(batch := tuple(itertools.islice(it, n))) == n:
        yield batch


@auto_n
def blockfrequency(it, M, /, *, n):
    """Perform test from NIST 800-22 / 2.2 on an iterable of 0's and 1's.

    Returns a p value, with p < 0.01 considered indicating non-random data.

    M: variable name chosen to match 800-22; size of subblocks to test.
    n: total number of bits to use from it.
    """

    # Using bx[i] for what NIST doc calls pi[i]
    bx = [sum(t) / M for t in blocks(itertools.islice(it, n), n=M)]
    chi2obs = 4 * M * sum((bi - 0.5) * (bi - 0.5) for bi in bx)
    return gammaincc(len(bx) / 2, chi2obs / 2)


@auto_n
def runs(it, /, *, n):
    """Perform test 2.3 from NIST 800-22."""

    # this test requires going through the values more than once so...
    e = list(itertools.islice(it, n))

    px = sum(e) / n          # 800-22 calls this Pi

    # per 800-22, must pass this precondition:
    if abs(px - 0.5) >= (2 / math.sqrt(n)):
        return 0.0

    vn_obs = sum(0 if e[k] == e[k+1] else 1 for k in range(n-1)) + 1

    pxx = px * (1 - px)
    return math.erfc(abs(vn_obs - (2 * n * pxx)) /
                     (2 * math.sqrt(2 * n) * pxx))


@auto_n
def longestrunofones(it, /, *, n):
    """Perform test 2.4 from NIST 800-22."""

    # As it says in section 3.4 of the NIST 800-22 paper:
    #  """Cases K=3, M=8; K=5, M=128; and K=6, M=10000
    #     are currently embedded in the test suite"""
    # This code reflects those choices and the values precomputed
    # for px[i] ("pi-sub-i" in the paper) were copy/pasted.

    if n >= 750000:
        M = 10000
        vmin = 10
        vmax = 16
        capN = 75
        px = [0.0882, 0.2092, 0.2483, 0.1933, 0.1208, 0.0675, 0.0727]
    elif n >= 6272:
        M = 128
        vmin = 4
        vmax = 9
        capN = 49
        px = [0.1174, 0.2430, 0.2493, 0.1752, 0.1027, 0.1124]
    else:
        M = 8
        vmin = 1
        vmax = 4
        capN = 16
        px = [0.2148, 0.3672, 0.2305, 0.1875]

    # algorithm from 2.4.4 in 800-22:
    #    (1) Divide the sequence into M-bit blocks ('blocks()')
    #    (1b) but note that only take capN of them regardless of n.
    #    (2) tabulate frequences of longest runs of runs (see doc for details)
    c = Counter()
    for t in blocks(itertools.islice(it, capN*M), n=M):
        # find the longest run of 1's in this tuple
        longest = 0
        runlen = 0
        for v in itertools.chain(t, (0,)):      # extra 0 closes any last run
            if v == 1:
                runlen += 1
            else:
                longest = max(runlen, longest)
                runlen = 0
        c[min(max(vmin, longest), vmax)] += 1

    #    (3) compute chi-squared-observed
    chi2obs = sum(((c[vmin+i] - (capN*pxi))**2) / (capN * pxi)
                  for i, pxi in enumerate(px))

    return gammaincc((len(px) - 1) / 2, chi2obs / 2)


def badgen(ef, /, *, width=32, badval=0):
    """A 'bad' random number generator.

    Returns a fixed value (badval) one out of every ef calls.
    Otherwise returns a random number of bit-width 'width'
    """

    rrn = 1 << width
    while True:
        for _ in range(ef-1):
            yield random.randrange(rrn)
        yield badval


if __name__ == "__main__":
    import unittest

    class TestMethods(unittest.TestCase):
        # test data from NIST 800-22
        # DO NOT CHANGE; the expected_p values all depend on this data
        NIST100 = [1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1,
                   1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
                   1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
                   0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
                   0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
                   0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0]

        NIST128 = [1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
                   0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
                   1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
                   0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
                   0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
                   1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
                   1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0,
                   1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0]

        @staticmethod
        def equalNIST(x1, x2):
            # compare within the 6-digits given in the NIST 800-22 paper
            return round(x1, 6) == round(x2, 6)

        def testfrequency(self):
            expected_p = 0.109599     # from 800-22
            p = frequency(self.NIST100)
            with self.subTest(p=p, expected_p=expected_p):
                self.assertTrue(self.equalNIST(expected_p, p))

        def testblockfreq(self):
            expected_p = 0.706438     # from 800-22
            p = blockfrequency(self.NIST100, 10)
            with self.subTest(p=p, expected_p=expected_p):
                self.assertTrue(self.equalNIST(expected_p, p))

        def testruns(self):
            expected_p = 0.500798     # from 800-22
            p = runs(self.NIST100)
            with self.subTest(p=p, expected_p=expected_p):
                self.assertTrue(self.equalNIST(expected_p, p))

        def testlongestrun1(self):
            expected_p = 0.180598     # from 800-22
            p = longestrunofones(self.NIST128)   # note 128 bits not 100
            with self.subTest(p=p, expected_p=expected_p):
                self.assertTrue(self.equalNIST(expected_p, p))

        def testbad(self):
            # Make some bad sequences from the NIST sequences, by forcing
            # every other entry to be zero. Some of those might already be
            # zero of course. NOTE - there is no guarantee that this
            # sufficiently "corrupts" a good sequence to be bad for all
            # possible tests. But empirically it works.
            BAD128 = [a * b
                      for a, b in zip(itertools.cycle((1, 0)), self.NIST128)]
            BADseq = BAD128[:100]

            badplim = 0.01
            self.assertTrue(frequency(BADseq) < badplim)
            self.assertTrue(blockfrequency(BADseq, 10) < badplim)
            self.assertTrue(runs(BADseq) < badplim)
            self.assertTrue(longestrunofones(BAD128) < badplim)

    unittest.main()
	import math
	import random
	import functools
	import itertools
	from collections import Counter

	try:
	from scipy.special import gammaincc
	except ModuleNotFoundError:
	def gammaincc(x1, x2):
	raise TypeError("Requires gammaincc; scipy was not found")

	# NIST Special Publication 800-22:
	#
	# https://csrc.nist.gov/publications/detail/sp/800-22/rev-1a/final
	#
	# defines statistical tests for randomness. This is an implementation of
	# selected tests, done simply as an exercise. For any "real" purpose,
	# there are better/faster/more-complete implementations; use them.
	# Two known ones in python are:
	# https://github.com/InsaneMonster/NistRng # pip install nistrng
	# https://github.com/stevenang/randomness_testsuite
	#


	def streambits(stream, /, *, width, n=None):
	"""Generate a sequence of 1's and 0's extracted from a stream of ints.

	stream: iterable generating integer values.
	width: the (implied) width in bits of each element of the stream.
	n: if specified, a limit on how many stream elements to consume.

	NOTE: bits generated LSb to MSb from each element in stream.
	"""

	masks = [1 << i for i in range(width)]
	for i in itertools.islice(stream, n):
	yield from (1 if (i & m) else 0 for m in masks)


	# this decorator just handles automatically calculating n if it
	# was not given. NOTE: of course, not giving n only works if the
	# iterable being used is a sequence (i.e., has a len())
	def auto_n(f):
	@functools.wraps(f)
	def _wrapped(it, args, n=None, *kwargs):
	if n is None:
	n = len(it)
	return f(it, args, n=n, *kwargs)
	return _wrapped


	@auto_n
	def frequency(it, /, *, n):
	"""Perform Monobit test from NIST 800-22 on a sequence of 0's and 1's.

	Section 2.1 in the NIST paper.

	Returns a p value as defined in NIST 800-22, with p < 0.01 considered
	as indicating the bits have failed the test.

	n: length of bits; if defaulted len(bits) will be used.
	"""
	sn = sum((-1, 1)[i] for i in itertools.islice(it, n))
	s_obs = abs(sn)/math.sqrt(n)
	return math.erfc(s_obs/math.sqrt(2))


	@auto_n
	def blocks(it, /, *, n):
	"""Batch data into tuples of length n. Throws away last if short."""
	while len(batch := tuple(itertools.islice(it, n))) == n:
	yield batch


	@auto_n
	def blockfrequency(it, M, /, *, n):
	"""Perform test from NIST 800-22 / 2.2 on an iterable of 0's and 1's.

	Returns a p value, with p < 0.01 considered indicating non-random data.

	M: variable name chosen to match 800-22; size of subblocks to test.
	n: total number of bits to use from it.
	"""

	# Using bx[i] for what NIST doc calls pi[i]
	bx = [sum(t) / M for t in blocks(itertools.islice(it, n), n=M)]
	chi2obs = 4 * M * sum((bi - 0.5) * (bi - 0.5) for bi in bx)
	return gammaincc(len(bx) / 2, chi2obs / 2)


	@auto_n
	def runs(it, /, *, n):
	"""Perform test 2.3 from NIST 800-22."""

	# this test requires going through the values more than once so...
	e = list(itertools.islice(it, n))

	px = sum(e) / n # 800-22 calls this Pi

	# per 800-22, must pass this precondition:
	if abs(px - 0.5) >= (2 / math.sqrt(n)):
	return 0.0

	vn_obs = sum(0 if e[k] == e[k+1] else 1 for k in range(n-1)) + 1

	pxx = px * (1 - px)
	return math.erfc(abs(vn_obs - (2 * n * pxx)) /
	(2 * math.sqrt(2 * n) * pxx))


	@auto_n
	def longestrunofones(it, /, *, n):
	"""Perform test 2.4 from NIST 800-22."""

	# As it says in section 3.4 of the NIST 800-22 paper:
	# """Cases K=3, M=8; K=5, M=128; and K=6, M=10000
	# are currently embedded in the test suite"""
	# This code reflects those choices and the values precomputed
	# for px[i] ("pi-sub-i" in the paper) were copy/pasted.

	if n >= 750000:
	M = 10000
	vmin = 10
	vmax = 16
	capN = 75
	px = [0.0882, 0.2092, 0.2483, 0.1933, 0.1208, 0.0675, 0.0727]
	elif n >= 6272:
	M = 128
	vmin = 4
	vmax = 9
	capN = 49
	px = [0.1174, 0.2430, 0.2493, 0.1752, 0.1027, 0.1124]
	else:
	M = 8
	vmin = 1
	vmax = 4
	capN = 16
	px = [0.2148, 0.3672, 0.2305, 0.1875]

	# algorithm from 2.4.4 in 800-22:
	# (1) Divide the sequence into M-bit blocks ('blocks()')
	# (1b) but note that only take capN of them regardless of n.
	# (2) tabulate frequences of longest runs of runs (see doc for details)
	c = Counter()
	for t in blocks(itertools.islice(it, capN*M), n=M):
	# find the longest run of 1's in this tuple
	longest = 0
	runlen = 0
	for v in itertools.chain(t, (0,)): # extra 0 closes any last run
	if v == 1:
	runlen += 1
	else:
	longest = max(runlen, longest)
	runlen = 0
	c[min(max(vmin, longest), vmax)] += 1

	# (3) compute chi-squared-observed
	chi2obs = sum(((c[vmin+i] - (capNpxi))2) / (capN pxi)
	for i, pxi in enumerate(px))

	return gammaincc((len(px) - 1) / 2, chi2obs / 2)


	def badgen(ef, /, *, width=32, badval=0):
	"""A 'bad' random number generator.

	Returns a fixed value (badval) one out of every ef calls.
	Otherwise returns a random number of bit-width 'width'
	"""

	rrn = 1 << width
	while True:
	for _ in range(ef-1):
	yield random.randrange(rrn)
	yield badval


	if __name__ == "__main__":
	import unittest

	class TestMethods(unittest.TestCase):
	# test data from NIST 800-22
	# DO NOT CHANGE; the expected_p values all depend on this data
	NIST100 = [1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1,
	1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
	1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
	0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
	0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
	0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0]

	NIST128 = [1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
	0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
	1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
	0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,
	0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
	1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
	1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0,
	1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0]

	@staticmethod
	def equalNIST(x1, x2):
	# compare within the 6-digits given in the NIST 800-22 paper
	return round(x1, 6) == round(x2, 6)

	def testfrequency(self):
	expected_p = 0.109599 # from 800-22
	p = frequency(self.NIST100)
	with self.subTest(p=p, expected_p=expected_p):
	self.assertTrue(self.equalNIST(expected_p, p))

	def testblockfreq(self):
	expected_p = 0.706438 # from 800-22
	p = blockfrequency(self.NIST100, 10)
	with self.subTest(p=p, expected_p=expected_p):
	self.assertTrue(self.equalNIST(expected_p, p))

	def testruns(self):
	expected_p = 0.500798 # from 800-22
	p = runs(self.NIST100)
	with self.subTest(p=p, expected_p=expected_p):
	self.assertTrue(self.equalNIST(expected_p, p))

	def testlongestrun1(self):
	expected_p = 0.180598 # from 800-22
	p = longestrunofones(self.NIST128) # note 128 bits not 100
	with self.subTest(p=p, expected_p=expected_p):
	self.assertTrue(self.equalNIST(expected_p, p))

	def testbad(self):
	# Make some bad sequences from the NIST sequences, by forcing
	# every other entry to be zero. Some of those might already be
	# zero of course. NOTE - there is no guarantee that this
	# sufficiently "corrupts" a good sequence to be bad for all
	# possible tests. But empirically it works.
	BAD128 = [a * b
	for a, b in zip(itertools.cycle((1, 0)), self.NIST128)]
	BADseq = BAD128[:100]

	badplim = 0.01
	self.assertTrue(frequency(BADseq) < badplim)
	self.assertTrue(blockfrequency(BADseq, 10) < badplim)
	self.assertTrue(runs(BADseq) < badplim)
	self.assertTrue(longestrunofones(BAD128) < badplim)

	unittest.main()