Skip to content

Instantly share code, notes, and snippets.

@evanthebouncy
Last active November 15, 2022 13:48
Show Gist options
  • Save evanthebouncy/1703d3e9aee71ba9124405fdb30bd967 to your computer and use it in GitHub Desktop.
Save evanthebouncy/1703d3e9aee71ba9124405fdb30bd967 to your computer and use it in GitHub Desktop.
language modeling for rectangles
from rectangle import is_inside, is_correct, inside, outside, W
import random
import string
# for the purpose of showing this is a "language model", all programs here are
# written as STRINGS, you need to call eval(prog) on them to get the actual program
def writer1():
return ''.join(random.choice(string.printable) for i in range(9))
def writer2():
U = random.randint(0,W)
D = random.randint(0,W)
L = random.randint(0,W)
R = random.randint(0,W)
return '['+str(U)+','+str(D)+','+str(L)+','+str(R)+']'
def writer3():
U = random.randint(0,W-2)
L = random.randint(0,W-2)
height = random.randint(1,W-U)
width = random.randint(1,W-L)
D, R = U+height, L+width
return '['+str(U)+','+str(D)+','+str(L)+','+str(R)+']'
# random input generation
def sample_input():
return random.randint(0,W-1), random.randint(0,W-1)
# random spec generation
def sample_spec(prog):
# pick a number of inputs, up to 20 patches of mushroom / grass total
n_inputs = random.randint(1,20)
inputs = [sample_input() for i in range(n_inputs)]
prog = eval(prog)
outputs = [is_inside(prog, input) for input in inputs]
return list(zip(inputs, outputs))
# generate a dataset
def sample_D(n_samples):
D = []
for i in range(n_samples):
prog = writer3()
spec = sample_spec(prog)
D.append((prog, spec))
return D
import numpy as np
# convert spec to a bit-vector
def spec_to_bitvec(spec):
bitvec = np.zeros((W,W,2))
for coord,bool in spec:
# turn bool into a number 0 or 1
bool_num = 1 if bool else 0
bitvec[coord[0],coord[1],bool_num] = 1
# flatten the bitvec into a 1D array
return bitvec.flatten()
import sklearn.linear_model
# train the unigram distribution
def train_unigram(D):
spec_bitvec, Ts, Ds, Ls, Rs = [], [], [], [], []
for prog, spec in D:
T, D, L, R = eval(prog)
spec_bitvec.append(spec_to_bitvec(spec))
Ts.append(T)
Ds.append(D)
Ls.append(L)
Rs.append(R)
# convert to numpy arrays
spec_bitvec = np.array(spec_bitvec)
Ts = np.array(Ts)
Ds = np.array(Ds)
Ls = np.array(Ls)
Rs = np.array(Rs)
model_T = sklearn.linear_model.LogisticRegression()
model_T.fit(spec_bitvec, Ts)
model_D = sklearn.linear_model.LogisticRegression()
model_D.fit(spec_bitvec, Ds)
model_L = sklearn.linear_model.LogisticRegression()
model_L.fit(spec_bitvec, Ls)
model_R = sklearn.linear_model.LogisticRegression()
model_R.fit(spec_bitvec, Rs)
return model_T, model_D, model_L, model_R
def get_writer4(model_T, model_D, model_L, model_R):
def writer4(spec):
spec_bitvec = spec_to_bitvec(spec)
model_T_prob = model_T.predict_proba([spec_bitvec])[0]
model_T_sample = np.random.choice(range(len(model_T_prob)), p=model_T_prob)
model_D_prob = model_D.predict_proba([spec_bitvec])[0]
model_D_sample = np.random.choice(range(len(model_D_prob)), p=model_D_prob)
model_L_prob = model_L.predict_proba([spec_bitvec])[0]
model_L_sample = np.random.choice(range(len(model_L_prob)), p=model_L_prob)
model_R_prob = model_R.predict_proba([spec_bitvec])[0]
model_R_sample = np.random.choice(range(len(model_R_prob)), p=model_R_prob)
return '[{},{},{},{}]'.format(model_T_sample, model_D_sample, model_L_sample, model_R_sample)
return writer4
def manual_writer(spec):
# get the coordinates of spec that are inside
inside_coords = [coord for coord,bool in spec if bool]
if inside_coords == []:
# if there are no inside coordinates, default to a random
return writer3()
# otherwise, use the inside coords to suggest parameters of the rectangle
row_coords = [coord[0] for coord in inside_coords]
col_coords = [coord[1] for coord in inside_coords]
T, D = random.choice(row_coords), random.choice(row_coords)
L, R = random.choice(col_coords), random.choice(col_coords)
return repr([T, D, L, R])
# a synthesizer that returns both a working program
# and the number of samples it took to find it
def get_synthesizer(writer, checker, budget):
def synthesizer(spec):
for i in range(1, budget):
try:
prog = eval(writer(spec))
T,D,L,R = prog
except:
continue
if checker(prog, spec):
return (i, prog)
return budget+1, None
return synthesizer
if __name__ == '__main__':
print (writer1())
print (len(string.printable))
print (writer2())
print (writer3())
r_prog = "[1,3,1,4]"
r_spec = sample_spec(r_prog)
print (r_prog, r_spec)
D = sample_D(5000)
print (D[442])
spec_bitvec = spec_to_bitvec(r_spec)
print (spec_bitvec)
model_T, model_D, model_L, model_R = train_unigram(D)
writer4 = get_writer4(model_T, model_D, model_L, model_R)
synth4 = get_synthesizer(writer4, is_correct, 1000)
print (synth4(r_spec))
# let's compare all the synthesizers to each other
D_train = sample_D(5000)
D_test = sample_D(1000)
synthesizer1 = get_synthesizer(lambda spec : writer1(), is_correct, 100)
synthesizer2 = get_synthesizer(lambda spec : writer2(), is_correct, 100)
synthesizer3 = get_synthesizer(lambda spec : writer3(), is_correct, 100)
synthesizer4 = get_synthesizer(get_writer4(*train_unigram(D_train)), is_correct, 100)
synthesizer5 = get_synthesizer(manual_writer, is_correct, 100)
to_plot = [[], [], [], [], []]
for _, spec in D_test:
for synth_id, synth in enumerate([synthesizer1, synthesizer2, synthesizer3, synthesizer4, synthesizer5]):
samples_needed, prog = synth(spec)
to_plot[synth_id].append(samples_needed)
print (to_plot)
plot_data = [[], [], [], [], []]
for budget in range(100):
for synth_id in range(5):
frac_solved = sum([samples_needed <= budget for samples_needed in to_plot[synth_id]]) / len(to_plot[synth_id])
plot_data[synth_id].append(frac_solved)
print (plot_data)
import matplotlib.pyplot as plt
plt.plot(range(100), plot_data[0], label='writer_all_strings')
plt.plot(range(100), plot_data[1], label='writer_dsl')
plt.plot(range(100), plot_data[2], label='writer_dsl_tight')
plt.plot(range(100), plot_data[3], label='writer_unigram')
plt.plot(range(100), plot_data[4], label='manual')
# put x axis on the log scale
plt.xscale('log')
# put y axis label as "fraction of tasks solved"
plt.ylabel('fraction of tasks solved')
# put x axis label as "budget given"
plt.xlabel('budget given (log)')
plt.legend()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment