evanthebouncy/rectangle_lm.py

## rectangle_lm.py
from rectangle import is_inside, is_correct, inside, outside, W
import random
import string

# for the purpose of showing this is a "language model", all programs here are
# written as STRINGS, you need to call eval(prog) on them to get the actual program

def writer1():
    return ''.join(random.choice(string.printable) for i in range(9))

def writer2():
    U = random.randint(0,W)
    D = random.randint(0,W)
    L = random.randint(0,W)
    R = random.randint(0,W)
    return '['+str(U)+','+str(D)+','+str(L)+','+str(R)+']'

def writer3():
    U = random.randint(0,W-2)
    L = random.randint(0,W-2)
    height = random.randint(1,W-U)
    width = random.randint(1,W-L)
    D, R = U+height, L+width
    return '['+str(U)+','+str(D)+','+str(L)+','+str(R)+']'

# random input generation
def sample_input():
    return random.randint(0,W-1), random.randint(0,W-1)

# random spec generation
def sample_spec(prog):
    # pick a number of inputs, up to 20 patches of mushroom / grass total
    n_inputs = random.randint(1,20)
    inputs = [sample_input() for i in range(n_inputs)]
    prog = eval(prog)
    outputs = [is_inside(prog, input) for input in inputs]
    return list(zip(inputs, outputs))

# generate a dataset
def sample_D(n_samples):
    D = []
    for i in range(n_samples):
        prog = writer3()
        spec = sample_spec(prog)
        D.append((prog, spec))
    return D

import numpy as np

# convert spec to a bit-vector
def spec_to_bitvec(spec):
    bitvec = np.zeros((W,W,2))
    for coord,bool in spec:
        # turn bool into a number 0 or 1
        bool_num = 1 if bool else 0
        bitvec[coord[0],coord[1],bool_num] = 1
    # flatten the bitvec into a 1D array
    return bitvec.flatten()


import sklearn.linear_model
# train the unigram distribution
def train_unigram(D):
    spec_bitvec, Ts, Ds, Ls, Rs = [], [], [], [], []
    for prog, spec in D:
        T, D, L, R = eval(prog)
        spec_bitvec.append(spec_to_bitvec(spec))
        Ts.append(T)
        Ds.append(D)
        Ls.append(L)
        Rs.append(R)
    # convert to numpy arrays
    spec_bitvec = np.array(spec_bitvec)
    Ts = np.array(Ts)
    Ds = np.array(Ds)
    Ls = np.array(Ls)
    Rs = np.array(Rs)
    model_T = sklearn.linear_model.LogisticRegression()
    model_T.fit(spec_bitvec, Ts)
    model_D = sklearn.linear_model.LogisticRegression()
    model_D.fit(spec_bitvec, Ds)
    model_L = sklearn.linear_model.LogisticRegression()
    model_L.fit(spec_bitvec, Ls)
    model_R = sklearn.linear_model.LogisticRegression()
    model_R.fit(spec_bitvec, Rs)
    return model_T, model_D, model_L, model_R

def get_writer4(model_T, model_D, model_L, model_R):
    def writer4(spec):
        spec_bitvec = spec_to_bitvec(spec)
        model_T_prob = model_T.predict_proba([spec_bitvec])[0]
        model_T_sample = np.random.choice(range(len(model_T_prob)), p=model_T_prob)
        model_D_prob = model_D.predict_proba([spec_bitvec])[0]
        model_D_sample = np.random.choice(range(len(model_D_prob)), p=model_D_prob)
        model_L_prob = model_L.predict_proba([spec_bitvec])[0]
        model_L_sample = np.random.choice(range(len(model_L_prob)), p=model_L_prob)
        model_R_prob = model_R.predict_proba([spec_bitvec])[0]
        model_R_sample = np.random.choice(range(len(model_R_prob)), p=model_R_prob)
        return '[{},{},{},{}]'.format(model_T_sample, model_D_sample, model_L_sample, model_R_sample)
    return writer4


def manual_writer(spec):

    # get the coordinates of spec that are inside
    inside_coords = [coord for coord,bool in spec if bool]
    if inside_coords == []:
        # if there are no inside coordinates, default to a random
        return writer3()
    # otherwise, use the inside coords to suggest parameters of the rectangle
    row_coords = [coord[0] for coord in inside_coords]
    col_coords = [coord[1] for coord in inside_coords]
    T, D = random.choice(row_coords), random.choice(row_coords)
    L, R = random.choice(col_coords), random.choice(col_coords)
    return repr([T, D, L, R])

# a synthesizer that returns both a working program
# and the number of samples it took to find it
def get_synthesizer(writer, checker, budget):
    def synthesizer(spec):
        for i in range(1, budget):
            try:
                prog = eval(writer(spec))
                T,D,L,R = prog
            except:
                continue
            if checker(prog, spec):
                return (i, prog)
        return budget+1, None
    return synthesizer

if __name__ == '__main__':

    print (writer1())
    print (len(string.printable))
    print (writer2())
    print (writer3())

    r_prog = "[1,3,1,4]"
    r_spec = sample_spec(r_prog)
    print (r_prog, r_spec)


    D = sample_D(5000)
    print (D[442])

    spec_bitvec = spec_to_bitvec(r_spec)
    print (spec_bitvec)

    model_T, model_D, model_L, model_R = train_unigram(D)
    writer4 = get_writer4(model_T, model_D, model_L, model_R)

    synth4 = get_synthesizer(writer4, is_correct, 1000)
    print (synth4(r_spec))

    # let's compare all the synthesizers to each other
    D_train = sample_D(5000)
    D_test = sample_D(1000)

    synthesizer1 = get_synthesizer(lambda spec : writer1(), is_correct, 100)
    synthesizer2 = get_synthesizer(lambda spec : writer2(), is_correct, 100)
    synthesizer3 = get_synthesizer(lambda spec : writer3(), is_correct, 100)
    synthesizer4 = get_synthesizer(get_writer4(*train_unigram(D_train)), is_correct, 100)
    synthesizer5 = get_synthesizer(manual_writer, is_correct, 100)

    to_plot = [[], [], [], [], []]
    for _, spec in D_test:
        for synth_id, synth in enumerate([synthesizer1, synthesizer2, synthesizer3, synthesizer4, synthesizer5]):
            samples_needed, prog = synth(spec)
            to_plot[synth_id].append(samples_needed)
    print (to_plot)

    plot_data = [[], [], [], [], []]
    for budget in range(100):
        for synth_id in range(5):
            frac_solved = sum([samples_needed <= budget for samples_needed in to_plot[synth_id]]) / len(to_plot[synth_id])
            plot_data[synth_id].append(frac_solved)
    print (plot_data)

    import matplotlib.pyplot as plt

    plt.plot(range(100), plot_data[0], label='writer_all_strings')
    plt.plot(range(100), plot_data[1], label='writer_dsl')
    plt.plot(range(100), plot_data[2], label='writer_dsl_tight')
    plt.plot(range(100), plot_data[3], label='writer_unigram')
    plt.plot(range(100), plot_data[4], label='manual')
    # put x axis on the log scale
    plt.xscale('log')
    # put y axis label as "fraction of tasks solved"
    plt.ylabel('fraction of tasks solved')
    # put x axis label as "budget given"
    plt.xlabel('budget given (log)')
    plt.legend()
    plt.show()
	from rectangle import is_inside, is_correct, inside, outside, W
	import random
	import string

	# for the purpose of showing this is a "language model", all programs here are
	# written as STRINGS, you need to call eval(prog) on them to get the actual program

	def writer1():
	return ''.join(random.choice(string.printable) for i in range(9))

	def writer2():
	U = random.randint(0,W)
	D = random.randint(0,W)
	L = random.randint(0,W)
	R = random.randint(0,W)
	return '['+str(U)+','+str(D)+','+str(L)+','+str(R)+']'

	def writer3():
	U = random.randint(0,W-2)
	L = random.randint(0,W-2)
	height = random.randint(1,W-U)
	width = random.randint(1,W-L)
	D, R = U+height, L+width
	return '['+str(U)+','+str(D)+','+str(L)+','+str(R)+']'

	# random input generation
	def sample_input():
	return random.randint(0,W-1), random.randint(0,W-1)

	# random spec generation
	def sample_spec(prog):
	# pick a number of inputs, up to 20 patches of mushroom / grass total
	n_inputs = random.randint(1,20)
	inputs = [sample_input() for i in range(n_inputs)]
	prog = eval(prog)
	outputs = [is_inside(prog, input) for input in inputs]
	return list(zip(inputs, outputs))

	# generate a dataset
	def sample_D(n_samples):
	D = []
	for i in range(n_samples):
	prog = writer3()
	spec = sample_spec(prog)
	D.append((prog, spec))
	return D

	import numpy as np

	# convert spec to a bit-vector
	def spec_to_bitvec(spec):
	bitvec = np.zeros((W,W,2))
	for coord,bool in spec:
	# turn bool into a number 0 or 1
	bool_num = 1 if bool else 0
	bitvec[coord[0],coord[1],bool_num] = 1
	# flatten the bitvec into a 1D array
	return bitvec.flatten()


	import sklearn.linear_model
	# train the unigram distribution
	def train_unigram(D):
	spec_bitvec, Ts, Ds, Ls, Rs = [], [], [], [], []
	for prog, spec in D:
	T, D, L, R = eval(prog)
	spec_bitvec.append(spec_to_bitvec(spec))
	Ts.append(T)
	Ds.append(D)
	Ls.append(L)
	Rs.append(R)
	# convert to numpy arrays
	spec_bitvec = np.array(spec_bitvec)
	Ts = np.array(Ts)
	Ds = np.array(Ds)
	Ls = np.array(Ls)
	Rs = np.array(Rs)
	model_T = sklearn.linear_model.LogisticRegression()
	model_T.fit(spec_bitvec, Ts)
	model_D = sklearn.linear_model.LogisticRegression()
	model_D.fit(spec_bitvec, Ds)
	model_L = sklearn.linear_model.LogisticRegression()
	model_L.fit(spec_bitvec, Ls)
	model_R = sklearn.linear_model.LogisticRegression()
	model_R.fit(spec_bitvec, Rs)
	return model_T, model_D, model_L, model_R

	def get_writer4(model_T, model_D, model_L, model_R):
	def writer4(spec):
	spec_bitvec = spec_to_bitvec(spec)
	model_T_prob = model_T.predict_proba([spec_bitvec])[0]
	model_T_sample = np.random.choice(range(len(model_T_prob)), p=model_T_prob)
	model_D_prob = model_D.predict_proba([spec_bitvec])[0]
	model_D_sample = np.random.choice(range(len(model_D_prob)), p=model_D_prob)
	model_L_prob = model_L.predict_proba([spec_bitvec])[0]
	model_L_sample = np.random.choice(range(len(model_L_prob)), p=model_L_prob)
	model_R_prob = model_R.predict_proba([spec_bitvec])[0]
	model_R_sample = np.random.choice(range(len(model_R_prob)), p=model_R_prob)
	return '[{},{},{},{}]'.format(model_T_sample, model_D_sample, model_L_sample, model_R_sample)
	return writer4


	def manual_writer(spec):

	# get the coordinates of spec that are inside
	inside_coords = [coord for coord,bool in spec if bool]
	if inside_coords == []:
	# if there are no inside coordinates, default to a random
	return writer3()
	# otherwise, use the inside coords to suggest parameters of the rectangle
	row_coords = [coord[0] for coord in inside_coords]
	col_coords = [coord[1] for coord in inside_coords]
	T, D = random.choice(row_coords), random.choice(row_coords)
	L, R = random.choice(col_coords), random.choice(col_coords)
	return repr([T, D, L, R])

	# a synthesizer that returns both a working program
	# and the number of samples it took to find it
	def get_synthesizer(writer, checker, budget):
	def synthesizer(spec):
	for i in range(1, budget):
	try:
	prog = eval(writer(spec))
	T,D,L,R = prog
	except:
	continue
	if checker(prog, spec):
	return (i, prog)
	return budget+1, None
	return synthesizer

	if __name__ == '__main__':

	print (writer1())
	print (len(string.printable))
	print (writer2())
	print (writer3())

	r_prog = "[1,3,1,4]"
	r_spec = sample_spec(r_prog)
	print (r_prog, r_spec)


	D = sample_D(5000)
	print (D[442])

	spec_bitvec = spec_to_bitvec(r_spec)
	print (spec_bitvec)

	model_T, model_D, model_L, model_R = train_unigram(D)
	writer4 = get_writer4(model_T, model_D, model_L, model_R)

	synth4 = get_synthesizer(writer4, is_correct, 1000)
	print (synth4(r_spec))

	# let's compare all the synthesizers to each other
	D_train = sample_D(5000)
	D_test = sample_D(1000)

	synthesizer1 = get_synthesizer(lambda spec : writer1(), is_correct, 100)
	synthesizer2 = get_synthesizer(lambda spec : writer2(), is_correct, 100)
	synthesizer3 = get_synthesizer(lambda spec : writer3(), is_correct, 100)
	synthesizer4 = get_synthesizer(get_writer4(*train_unigram(D_train)), is_correct, 100)
	synthesizer5 = get_synthesizer(manual_writer, is_correct, 100)

	to_plot = [[], [], [], [], []]
	for _, spec in D_test:
	for synth_id, synth in enumerate([synthesizer1, synthesizer2, synthesizer3, synthesizer4, synthesizer5]):
	samples_needed, prog = synth(spec)
	to_plot[synth_id].append(samples_needed)
	print (to_plot)

	plot_data = [[], [], [], [], []]
	for budget in range(100):
	for synth_id in range(5):
	frac_solved = sum([samples_needed <= budget for samples_needed in to_plot[synth_id]]) / len(to_plot[synth_id])
	plot_data[synth_id].append(frac_solved)
	print (plot_data)

	import matplotlib.pyplot as plt

	plt.plot(range(100), plot_data[0], label='writer_all_strings')
	plt.plot(range(100), plot_data[1], label='writer_dsl')
	plt.plot(range(100), plot_data[2], label='writer_dsl_tight')
	plt.plot(range(100), plot_data[3], label='writer_unigram')
	plt.plot(range(100), plot_data[4], label='manual')
	# put x axis on the log scale
	plt.xscale('log')
	# put y axis label as "fraction of tasks solved"
	plt.ylabel('fraction of tasks solved')
	# put x axis label as "budget given"
	plt.xlabel('budget given (log)')
	plt.legend()
	plt.show()