Created
March 19, 2023 08:46
-
-
Save rgov/849b073e9ea59724356f689a540731c7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
I asked ChatGPT (GPT-4) to write some Z3 code that tries to generate a string | |
that roughly matches the letter frequency distribution of normal English text. | |
It got the implementation _very_ close on the first try. However, Z3 is not able | |
to make progress on the problem. | |
''' | |
from z3 import * | |
s = Optimize() | |
s.set(timeout=5000) # Give up after a few seconds | |
# Fixed letter frequencies of standard English text | |
fixed_freqs = [0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, | |
0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, | |
0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, | |
0.00978, 0.0236, 0.0015, 0.01974, 0.00074] | |
n = 100 # Length of the array | |
variables = [Int('x_%i' % i) for i in range(n)] | |
# Define the constraints for the symbolic integer variables | |
constraints = [And(0 <= v, v <= 25) for v in variables] | |
s.add(constraints) | |
# Count the occurrences of each value in the array | |
counts = [Int('count_%i' % i) for i in range(26)] | |
for i in range(26): | |
s.add(counts[i] == Sum([If(v == i, 1, 0) for v in variables])) | |
# Compute the relative frequencies | |
freqs = [Real('freq_%i' % i) for i in range(26)] | |
for i in range(26): | |
s.add(freqs[i] == counts[i] / n) | |
# Calculate the squared differences between the computed and fixed frequencies | |
squared_diffs = [Real('sq_diff_%i' % i) for i in range(26)] | |
for i in range(26): | |
s.add(squared_diffs[i] == (freqs[i] - fixed_freqs[i])**2) | |
# Calculate the RMS error | |
rms_error = Real('rms_error') | |
s.add(rms_error == Sum(squared_diffs)) # not bothering with sqrt(x / 26) | |
# Define the optimization problem to minimize the error term | |
s.minimize(rms_error) | |
# Solve the optimization problem | |
if s.check() == sat: | |
model = s.model() | |
print('Minimum RMS Error:', model[rms_error].as_decimal(3)) | |
print('String:', | |
''.join(chr(ord('A') + model[v].as_long()) for v in variables)) | |
for i, (c, f, d) in enumerate(zip(counts, freqs, squared_diffs)): | |
print( | |
chr(ord('A') + i), | |
model[c].as_long(), | |
model[f].as_decimal(3), | |
model[d].as_decimal(3), | |
) | |
else: | |
print('No solution found') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Some modified approaches:
Try to minimize each term of
rms_error
instead of its sum. Like this post. Didn't really work.Iteratively minimize by adding
rms_error < model[rms_error]
each step. Like this post. Kind of works but the solutions aren't very impressive.The exponentiation of
(freqs[i] - fixed_freqs[I])**2
is a big part of why this breaks down; multiplication of variables takes the problem out of the domain of the linear optimizer.