Skip to content

Instantly share code, notes, and snippets.

@jfbu
Last active May 5, 2018 20:08
Show Gist options
  • Save jfbu/b14e09d7e8bcc16c394fd257799b0a0c to your computer and use it in GitHub Desktop.
Save jfbu/b14e09d7e8bcc16c394fd257799b0a0c to your computer and use it in GitHub Desktop.
Python code demonstrating biases in the parity bits of the PDFTeX RNG
#!/bin/env/py
"""This is a quick Pythonification of the Pascal code in pdftex.web
I know I should use a Python generator rather than variables with
global scope...
Anyway, brief testing showed it does match exactly with actual
\pdfuniformedeviate/\pdfsetrandomseed, and that's what I wanted
in case I attempt some statistics on this PRNG.
May 2, 2018. JF B.
Added test_walk May 5, 2018.
Conclusion: in successive runs of 165 \pdfuniformdeviate
(starting with the 55th one after \pdfsetrandomseed)
it is more frequent that there are more odd numbers than even numbers.
"""
fraction_one = 2**28
fraction_half = 2**27
randoms = [0] * 55
j_random = 0
def next_random():
global j_random
if j_random==0:
new_randoms()
else:
j_random -=1
def new_randoms():
global j_random
global randoms
for k in range(24):
randoms[k] = (randoms[k] - randoms[k+31]) % fraction_one
for k in range(24, 55):
randoms[k] = (randoms[k] - randoms[k-24]) % fraction_one
j_random = 54
def init_randoms(seed):
global randoms
j = abs(seed)
while j >= fraction_one:
j //= 2
k = 1
for i in range(55):
jj = k
k = (j - k) % fraction_one
j = jj
randoms[(i * 21) % 55] = j
new_randoms()
new_randoms()
new_randoms()
def unif_rand(x):
global j_random
global randoms
next_random()
# here we must round, but Python // truncates, so we are in exact
# opposite as when we want to truncate in a \numexpr which rounds ;-)
y = ( abs(x) * randoms[j_random] + fraction_half ) // fraction_one
if y == abs(x):
return 0
else:
if x > 0:
return y
else:
return -y
# all the above is quite direct Pythonification of code from pdftex.web
# it definitely should be using a generator ("yield") to be more Pythonesque...
# now let's start using it
# def test(seed, N, reps):
# """Create a stream of random integers which we can compare with similar
# one from tex code using \pdfuniformdeviate N
# I did it with 1000 draws and seeds 12345 and 12346 and match was perfect.
# """
# init_randoms(seed)
# with open("randomints_py_" + str(seed) + ".txt", "w") as f:
# for k in range(reps):
# f.write(str(unif_rand(N)) + '\n')
from scipy.stats import chisquare
def test1(seed, reps):
"""Just for checking things do work as expected.
Helped me understand the shift by 1 at start."""
global j_random
max_rand = 2**28
print("seed = %s" % seed)
L = [0] * 110
init_randoms(seed)
print(j_random)
for k in range(54):
x = unif_rand(max_rand)
print(j_random)
for i in range(55):
L[54-i] = unif_rand(max_rand)
for i in range(55):
L[55 + 54 - i] = unif_rand(max_rand)
occ = {0: 0, 1: 0}
for k in range(reps):
L[:55] = L[55:]
for i in range(109, 54, -1):
L[i] = unif_rand(max_rand)
for i in range(55, 110):
y = L[i] - L[i - 55] + L[i - 24]
x = y % 2
# if y != 0:
# print(reps, i, y)
occ[x] +=1
print(occ)
def test2(seed, R):
"""Checking I go recurrences right.
"""
print(seed)
max_rand = 2**28
init_randoms(seed)
L = [0] # when we start j_random is 54 which means morally
# that an array item is already consumed, first random will be
# the one at randoms[53]
# so we put a dummy value in Oth position of L
# first random will be put in position 1 etc...
for k in range(1, R+1):
L.append(unif_rand(max_rand))
# This is recursion using only past
# We check Z is zero modulo 2**28 always
for n in range(110, R+1-24):
u = n % 55
if u < 7:
Z = L[n] - (L[n-7] - L[n-31]- L[n-38] + L[n-55])
elif u < 31:
Z = L[n] - (-L[n-31] + L[n-55] + L[n-62])
else:
Z = L[n] - (L[n-55] - L[n-86])
# never happens...
if Z % max_rand:
print("error", n)
return None
def test_walk(seed, reps, M):
""" I initially was planning to mostly use it with 55*79
but already 165 = 55*3 is interesting.
ATTENTION MUST BE USED WITH M ODD
"""
# anyway as we work mod 2, only two seeds: 0 and 1 !
print("With seed %s and runs of %s trials" % (seed, M))
max_rand = 2**28
init_randoms(seed)
# shift to start
# this way batches of 55 randoms correspond to 55 values of the Fibonacci lagged sequence
print("skipping first 54")
for k in range(54):
x = unif_rand(max_rand)
L = []
D = {0:0, 1:0}
for n in range(0, reps):
# check how many 1s among the M first parity bits
S = 0
for k in range(M):
x = ((unif_rand(max_rand) & 1) << 1) - 1 # +1 if odd, -1 if even
S += x
# S = N_odd - N_even (N_odd + N_even = M is supposed odd, so no tie.)
if S > 0:
D[1] += 1
else:
D[0] += 1
if n % 100 == 0:
print("observed probability of more 1s after %s batches of %s is %s" %
(n+1, M, D[1]/(D[0]+D[1])))
print(D)
print(D)
print("observed probability of more 1s after %s batches of %s is %s" %
(reps, M, D[1]/(D[0]+D[1])))
return chisquare(list(D.values()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment