rgov/z3-optimize-letter-freqs.py

## z3-optimize-letter-freqs.py
#!/usr/bin/env python3
'''
I asked ChatGPT (GPT-4) to write some Z3 code that tries to generate a string
that roughly matches the letter frequency distribution of normal English text.

It got the implementation _very_ close on the first try. However, Z3 is not able
to make progress on the problem.
'''

from z3 import *


s = Optimize()
s.set(timeout=5000)  # Give up after a few seconds


# Fixed letter frequencies of standard English text
fixed_freqs = [0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015,
               0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749,
               0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758,
               0.00978, 0.0236, 0.0015, 0.01974, 0.00074]

n = 100  # Length of the array
variables = [Int('x_%i' % i) for i in range(n)]

# Define the constraints for the symbolic integer variables
constraints = [And(0 <= v, v <= 25) for v in variables]
s.add(constraints)

# Count the occurrences of each value in the array
counts = [Int('count_%i' % i) for i in range(26)]
for i in range(26):
    s.add(counts[i] == Sum([If(v == i, 1, 0) for v in variables]))

# Compute the relative frequencies
freqs = [Real('freq_%i' % i) for i in range(26)]
for i in range(26):
    s.add(freqs[i] == counts[i] / n)

# Calculate the squared differences between the computed and fixed frequencies
squared_diffs = [Real('sq_diff_%i' % i) for i in range(26)]
for i in range(26):
    s.add(squared_diffs[i] == (freqs[i] - fixed_freqs[i])**2)

# Calculate the RMS error
rms_error = Real('rms_error')
s.add(rms_error == Sum(squared_diffs))  # not bothering with sqrt(x / 26)

# Define the optimization problem to minimize the error term
s.minimize(rms_error)

# Solve the optimization problem
if s.check() == sat:
    model = s.model()
    print('Minimum RMS Error:', model[rms_error].as_decimal(3))
    print('String:',
          ''.join(chr(ord('A') + model[v].as_long()) for v in variables))
    for i, (c, f, d) in enumerate(zip(counts, freqs, squared_diffs)):
        print(
            chr(ord('A') + i),
            model[c].as_long(),
            model[f].as_decimal(3),
            model[d].as_decimal(3),
        )


else:
    print('No solution found')
	#!/usr/bin/env python3
	'''
	I asked ChatGPT (GPT-4) to write some Z3 code that tries to generate a string
	that roughly matches the letter frequency distribution of normal English text.

	It got the implementation _very_ close on the first try. However, Z3 is not able
	to make progress on the problem.
	'''

	from z3 import *


	s = Optimize()
	s.set(timeout=5000) # Give up after a few seconds


	# Fixed letter frequencies of standard English text
	fixed_freqs = [0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015,
	0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749,
	0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758,
	0.00978, 0.0236, 0.0015, 0.01974, 0.00074]

	n = 100 # Length of the array
	variables = [Int('x_%i' % i) for i in range(n)]

	# Define the constraints for the symbolic integer variables
	constraints = [And(0 <= v, v <= 25) for v in variables]
	s.add(constraints)

	# Count the occurrences of each value in the array
	counts = [Int('count_%i' % i) for i in range(26)]
	for i in range(26):
	s.add(counts[i] == Sum([If(v == i, 1, 0) for v in variables]))

	# Compute the relative frequencies
	freqs = [Real('freq_%i' % i) for i in range(26)]
	for i in range(26):
	s.add(freqs[i] == counts[i] / n)

	# Calculate the squared differences between the computed and fixed frequencies
	squared_diffs = [Real('sq_diff_%i' % i) for i in range(26)]
	for i in range(26):
	s.add(squared_diffs[i] == (freqs[i] - fixed_freqs[i])**2)

	# Calculate the RMS error
	rms_error = Real('rms_error')
	s.add(rms_error == Sum(squared_diffs)) # not bothering with sqrt(x / 26)

	# Define the optimization problem to minimize the error term
	s.minimize(rms_error)

	# Solve the optimization problem
	if s.check() == sat:
	model = s.model()
	print('Minimum RMS Error:', model[rms_error].as_decimal(3))
	print('String:',
	''.join(chr(ord('A') + model[v].as_long()) for v in variables))
	for i, (c, f, d) in enumerate(zip(counts, freqs, squared_diffs)):
	print(
	chr(ord('A') + i),
	model[c].as_long(),
	model[f].as_decimal(3),
	model[d].as_decimal(3),
	)


	else:
	print('No solution found')