Skip to content

Instantly share code, notes, and snippets.

@giuseppebonaccorso
Created May 20, 2017 15:00
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save giuseppebonaccorso/d6e5bee6d50480344493b66f88fc414b to your computer and use it in GitHub Desktop.
Save giuseppebonaccorso/d6e5bee6d50480344493b66f88fc414b to your computer and use it in GitHub Desktop.
Mathematica expression learning experiment using a Seq2Seq approach
Number of samples = 100000
Symbols = Integer relative numbers bounded between (0, 100), Arithmetic operations, Brackets, Empty space (for padding)
Keras backend = Theano 0.9.0
Training hardware = Core i7, GeForce 960, 32 GB Ram
Training time = 5.4 hours / 5 epochs
Test results (as expected there are many errors due to the size of the training dataset):
-50/-68 = 0
(-96*85) = -7820
-(-17--82) = -63
-16*5 = -74
48*-60 = -2840
(66+-19) = 43
69+41 = 116
(-16-26) = -44
17/-11 = -2
-20-11 = -33
5+60 = 63
-(-81+62) = 29
(-60/-89) = 0
(45+21) = 62
-(91-39) = -44
-68*-12 = 778
(-92+-7) = -97
-(35*-91) = 3175
-(13+-89) = 70
-(-5/-38) = 0
-(83+54) = -145
-61-44 = -107
65*-82 = -5470
-(-99/-64) = -2
(-88--78) = -1
-12*-94 = 904
-22*5 = -120
-91*-69 = 6227
-40/90 = -1
68/-83 = -1
-40+-89 = -137
-62--14 = -44
-87--72 = -17
(82*-35) = -3870
(-71*65) = -4085
(-51-66) = -117
-(18/-79) = 1
(-23*46) = -1162
-6*98 = -578
-(-32/-5) = -7
-18*-4 = 72
98/19 = 5
-5-68 = -61
-(99--13) = 110
(99--6) = 117
-(65/-91) = 1
-29/99 = -1
-13/-64 = 0
39/-1 = 47
-(-11*-13) = -17
(-51-26) = -77
-89-15 = -104
-(-81+9) = 62
-44/-46 = 1
(-51+96) = 43
88--42 = 138
-82*-43 = 3774
-85+56 = -23
(2/-16) = -1
-88+28 = -64
-(42+72) = -118
(7+-48) = -43
(78--41) = 127
24--35 = 53
(-4--99) = 97
(-32/-89) = 0
-62-84 = -148
78+90 = 174
(-76-83) = -157
(-80--45) = -27
-95/-21 = 4
9*56 = 484
(-25*70) = -1450
(-36-78) = -116
-84+-32 = -114
(-69-70) = -139
-(58*-53) = 3474
(4/97) = 0
-27*-70 = 1870
-(-8--12) = -12
(-34--20) = -18
-48+-75 = -123
-(-40*42) = 2840
-81/56 = -2
-97/24 = -4
(64/-90) = -1
-7*-99 = 627
-46*47 = -2208
-22*80 = -1440
26+46 = 68
(8+50) = 53
(12+-85) = -77
(78+-47) = 29
-(-80*-75) = -5600
-(-38+-29) = 63
(-25*-45) = 1155
(88+60) = 144
37+-71 = -34
83+-36 = 43
(-14*-84) = 114
'''
Mathematical expression learning experiment
Giuseppe Bonaccorso (https://www.bonaccorso.eu)
Based on: http://machinelearningmastery.com/learn-add-numbers-seq2seq-recurrent-neural-networks/
'''
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, TimeDistributed, RepeatVector
from keras.layers.recurrent import LSTM
from sklearn.preprocessing import LabelBinarizer
import keras.backend as K
import numpy as np
# Set random seed (for reproducibility)
np.random.seed(1000)
# Mathematical symbols
symbols = [' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-', '/', '*', '(', ')']
operation_offset = 11
minus_symbol = 12
open_bracket = 15
closed_bracket = 16
# Number of training samples
nb_samples = 100000
# Sequence(s) lenght
input_sequence_length = 340
output_sequence_length = 340
# Binarize symbols
label_binarizer = LabelBinarizer()
label_binarizer.fit(symbols)
# Symbol length
symbol_lenght = len(label_binarizer.transform([symbols[0]])[0])
# Empty symbol
empty_symbol = label_binarizer.transform([symbols[0]])[0]
# Time steps
time_steps = int(input_sequence_length / symbol_lenght)
def expression_to_symbols(value):
s = []
for digit in str(value):
s.append(digit)
return label_binarizer.transform(np.array(s)).flatten()
def symbols_to_expression(expression):
syms = ''
for row in expression:
syms += label_binarizer.inverse_transform(to_binary(row).reshape((1, symbol_lenght)))[0]
return syms.strip()
def operation(op_type, a, b):
ops = {
0: a + b,
1: a - b,
2: int(a / b),
3: a * b
}
return ops.get(op_type)
def generate_random_expression():
# First term
a = np.random.randint(-100, 100)
# Second term (avoid zero for divisions)
b = np.random.randint(1, 100)
if binary_decision():
b = -b
# Operator
op = np.random.randint(0, 4)
result = operation(op, a, b)
full_expression = (expression_to_symbols(a),
expression_to_symbols(symbols[op + operation_offset]),
expression_to_symbols(b))
if binary_decision():
# Insert brackets
open_bracket_expression = (expression_to_symbols(symbols[open_bracket]),)
if binary_decision():
# Insert a minus in front of the exception
open_bracket_expression = (expression_to_symbols(symbols[minus_symbol]),) + open_bracket_expression
result *= -1
full_expression = open_bracket_expression + full_expression
full_expression += (expression_to_symbols(symbols[closed_bracket]),)
x = pad(np.concatenate(full_expression), input_sequence_length).reshape(time_steps, symbol_lenght)
r = pad(expression_to_symbols(result), output_sequence_length).reshape(time_steps, symbol_lenght)
return x, r, result
def create_dataset(n_samples=5000):
print('Creating dataset with %d samples' % nb_samples)
X = []
Y = []
for _ in range(n_samples):
x, r, _ = generate_random_expression()
X.append(x.astype(K.floatx()))
Y.append(r.astype(K.floatx()))
return np.array(X).astype(K.floatx()), np.array(Y).astype(K.floatx())
def binary_decision():
return True if np.random.uniform(0, 1) < 0.5 else False
def pad(x, sequence_length):
if len(x) < sequence_length:
n = int((sequence_length - len(x)) / len(empty_symbol))
for _ in range(n):
x = np.concatenate((x, empty_symbol))
return x
def to_binary(x):
v = np.argmax(x)
z = np.zeros(shape=symbol_lenght)
z[v] = 1.0
return z
def make_expression(string_expression):
s = []
for digit in string_expression.strip():
s.append(digit)
return pad(label_binarizer.transform(np.array(s)).flatten(), input_sequence_length).\
reshape(1, time_steps, symbol_lenght)
def create_model():
model = Sequential()
model.add(LSTM(250, input_shape=(time_steps, symbol_lenght)))
model.add(RepeatVector(time_steps))
model.add(LSTM(100, return_sequences=True))
model.add(TimeDistributed(Dense(symbol_lenght, activation='softmax')))
# Compile model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
return model
if __name__ == '__main__':
print('Expression learning experiment')
print('Symbol table:')
for symbol in symbols:
print(symbol + ' -> ' + str(label_binarizer.transform([symbol])))
# Create dataset
print('Training model...')
X, Y = create_dataset(n_samples=nb_samples)
# Create model
model = create_model()
# Train model
model.fit(X, Y, batch_size=1, epochs=5)
# Test
print('Test:')
X_test, Y_test = create_dataset(n_samples=100)
Y_pred = model.predict(X_test)
for i, y in enumerate(Y_pred):
print('%s = %s' % (symbols_to_expression(X_test[i]), symbols_to_expression(y)))
@michael20at
Copy link

Hi, I trained and modified (wrote input method for) your LSTM Script, and after training for two days I got accuracy up to 98%, which seemed fine. On testing I noted that it works great with inputs in the range it trained on (-99 to 99) with nearly no error, but resultet in totally wrong output for larger numbers (like 100*2)!

Any idea why? Is this inherent in the LSTM Setup, is it only interpolating and not learning the structure? I'll try to train it on bigger input numbers now, any idea if it is even possible to generalize? Goal should be for it to work for arbitrary large numbers so that it really has learned the rules of caculating, right? Thank you!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment