Created
August 25, 2015 16:03
-
-
Save StuartGordonReid/d366855e481f717b94be to your computer and use it in GitHub Desktop.
Python implementation of the Universal cryptographic test for randomness
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def universal(self, bin_data: str): | |
""" | |
Note that this description is taken from the NIST documentation [1] | |
[1] http://csrc.nist.gov/publications/nistpubs/800-22-rev1a/SP800-22rev1a.pdf | |
The focus of this test is the number of bits between matching patterns (a measure that is related to the | |
length of a compressed sequence). The purpose of the test is to detect whether or not the sequence can be | |
significantly compressed without loss of information. A significantly compressible sequence is considered | |
to be non-random. **This test is always skipped because the requirements on the lengths of the binary | |
strings are too high i.e. there have not been enough trading days to meet the requirements. | |
:param bin_data: a binary string | |
:return: the p-value from the test | |
""" | |
# The below table is less relevant for us traders and markets than it is for security people | |
n = len(bin_data) | |
pattern_size = 5 | |
if n >= 387840: | |
pattern_size = 6 | |
if n >= 904960: | |
pattern_size = 7 | |
if n >= 2068480: | |
pattern_size = 8 | |
if n >= 4654080: | |
pattern_size = 9 | |
if n >= 10342400: | |
pattern_size = 10 | |
if n >= 22753280: | |
pattern_size = 11 | |
if n >= 49643520: | |
pattern_size = 12 | |
if n >= 107560960: | |
pattern_size = 13 | |
if n >= 231669760: | |
pattern_size = 14 | |
if n >= 496435200: | |
pattern_size = 15 | |
if n >= 1059061760: | |
pattern_size = 16 | |
if 5 < pattern_size < 16: | |
# Create the biggest binary string of length pattern_size | |
ones = "" | |
for i in range(pattern_size): | |
ones += "1" | |
# How long the state list should be | |
num_ints = int(ones, 2) | |
vobs = numpy.zeros(num_ints + 1) | |
# Keeps track of the blocks, and whether were are initializing or summing | |
num_blocks = math.floor(n / pattern_size) | |
init_bits = 10 * pow(2, pattern_size) | |
test_bits = num_blocks - init_bits | |
# These are the expected values assuming randomness (uniform) | |
c = 0.7 - 0.8 / pattern_size + (4 + 32 / pattern_size) * pow(test_bits, -3 / pattern_size) / 15 | |
variance = [0, 0, 0, 0, 0, 0, 2.954, 3.125, 3.238, 3.311, 3.356, 3.384, 3.401, 3.410, 3.416, 3.419, 3.421] | |
expected = [0, 0, 0, 0, 0, 0, 5.2177052, 6.1962507, 7.1836656, 8.1764248, 9.1723243, | |
10.170032, 11.168765, 12.168070, 13.167693, 14.167488, 15.167379] | |
sigma = c * math.sqrt(variance[pattern_size] / test_bits) | |
cumsum = 0.0 | |
for i in range(num_blocks): | |
block_start = i * pattern_size | |
block_end = block_start + pattern_size | |
block_data = bin_data[block_start: block_end] | |
# Work out what state we are in | |
int_rep = int(block_data, 2) | |
# Initialize the state list | |
if i < init_bits: | |
vobs[int_rep] = i + 1 | |
else: | |
initial = vobs[int_rep] | |
vobs[int_rep] = i + 1 | |
cumsum += math.log(i - initial + 1, 2) | |
# Calculate the statistic | |
phi = float(cumsum / test_bits) | |
stat = abs(phi - expected[pattern_size]) / (float(math.sqrt(2)) * sigma) | |
p_val = spc.erfc(stat) | |
return p_val | |
else: | |
return -1.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment