Create a gist now

Instantly share code, notes, and snippets.

Embed
Simple XOR learning with keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])
model = Sequential()
model.add(Dense(8, input_dim=2))
model.add(Activation('tanh'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
sgd = SGD(lr=0.1)
model.compile(loss='binary_crossentropy', optimizer=sgd)
model.fit(X, y, show_accuracy=True, batch_size=1, nb_epoch=1000)
print(model.predict_proba(X))
"""
[[ 0.0033028 ]
[ 0.99581173]
[ 0.99530098]
[ 0.00564186]]
"""
@finnbear

This comment has been minimized.

Show comment
Hide comment
@finnbear

finnbear Aug 18, 2017

Great simple example. I would get it to work with only two neurons in the dense layer by running for more epochs. I tested this and it gets to similar accuracy with 5000 epochs. With rounding, only ~2000 epochs are needed.

Great simple example. I would get it to work with only two neurons in the dense layer by running for more epochs. I tested this and it gets to similar accuracy with 5000 epochs. With rounding, only ~2000 epochs are needed.

@consciencia

This comment has been minimized.

Show comment
Hide comment
@consciencia

consciencia Sep 9, 2017

Try this code. Best result I was able to get from it is 22 epochs with batchsize = 4. So only 86 network evaluations !!!

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import Callback
from keras.initializers import VarianceScaling 
import numpy as np 


lastEpoch = 0


class EarlyStoppingByLossVal(Callback):
    def __init__(self, monitor='val_loss', value=0.008, verbose=0):
        super(Callback, self).__init__()
        self.monitor = monitor
        self.value = value
        self.verbose = verbose
    def on_epoch_end(self, epoch, logs={}):
        global lastEpoch
        current = logs.get("loss")         
        if current != None and current < self.value:
            self.model.stop_training = True
            lastEpoch = epoch + 1


x = np.array([
    [0,0], [0,1],
    [1,0], [1,1]
])
y = np.array([
    [0], [1], 
    [1], [0]
])

model = Sequential()
model.add(Dense(8, 
                input_dim = 2, 
                use_bias = False, 
                kernel_initializer = VarianceScaling()))
model.add(Activation('tanh'))
model.add(Dense(1, 
                use_bias = False, 
                kernel_initializer = VarianceScaling()))
model.add(Activation('tanh'))
model.compile(loss = "mean_squared_error", 
              optimizer = SGD(lr = 0.6, 
                              momentum = 0.6))

model.fit(x, y, 
          verbose = 1, 
          batch_size = 4, 
          epochs = 10000, 
          callbacks = [
            EarlyStoppingByLossVal()
          ])

print(model.predict_proba(x))
print("Last epoch: " + str(lastEpoch))

Try this code. Best result I was able to get from it is 22 epochs with batchsize = 4. So only 86 network evaluations !!!

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import Callback
from keras.initializers import VarianceScaling 
import numpy as np 


lastEpoch = 0


class EarlyStoppingByLossVal(Callback):
    def __init__(self, monitor='val_loss', value=0.008, verbose=0):
        super(Callback, self).__init__()
        self.monitor = monitor
        self.value = value
        self.verbose = verbose
    def on_epoch_end(self, epoch, logs={}):
        global lastEpoch
        current = logs.get("loss")         
        if current != None and current < self.value:
            self.model.stop_training = True
            lastEpoch = epoch + 1


x = np.array([
    [0,0], [0,1],
    [1,0], [1,1]
])
y = np.array([
    [0], [1], 
    [1], [0]
])

model = Sequential()
model.add(Dense(8, 
                input_dim = 2, 
                use_bias = False, 
                kernel_initializer = VarianceScaling()))
model.add(Activation('tanh'))
model.add(Dense(1, 
                use_bias = False, 
                kernel_initializer = VarianceScaling()))
model.add(Activation('tanh'))
model.compile(loss = "mean_squared_error", 
              optimizer = SGD(lr = 0.6, 
                              momentum = 0.6))

model.fit(x, y, 
          verbose = 1, 
          batch_size = 4, 
          epochs = 10000, 
          callbacks = [
            EarlyStoppingByLossVal()
          ])

print(model.predict_proba(x))
print("Last epoch: " + str(lastEpoch))
@baj12

This comment has been minimized.

Show comment
Hide comment
@baj12

baj12 Oct 14, 2017

how would i solve this xor problem:

n=300
x1 = np.random.rand(n,2) * (-1)
x2 = np.random.rand(n,2)
x2[:,1] *= (-1)

x3 = np.random.rand(n,2)
x3[:,0] *= (-1)
x4 = np.random.rand(n,2)
x = np.concatenate((x1, x2, x3, x4))
x = (x + 1 ) /2
y1 = np.ones(n)
y4 = np.ones(n)
y2 = np.zeros(n)
y3 = np.zeros(n)
y = np.concatenate((y1,y2,y3,y4))
print (x1[[1,2],:])
print (x2[[1,2],:])
print (x3[[1,2],:])
print (x4[[1,2],:])

import matplotlib.pyplot as plt

plt.plot(x1[:,0], x1[:,1], 'ro')
plt.plot(x2[:,0], x2[:,1], 'bo')
plt.plot(x3[:,0], x3[:,1], 'bo')
plt.plot(x4[:,0], x4[:,1], 'ro')

plt.show()

baj12 commented Oct 14, 2017

how would i solve this xor problem:

n=300
x1 = np.random.rand(n,2) * (-1)
x2 = np.random.rand(n,2)
x2[:,1] *= (-1)

x3 = np.random.rand(n,2)
x3[:,0] *= (-1)
x4 = np.random.rand(n,2)
x = np.concatenate((x1, x2, x3, x4))
x = (x + 1 ) /2
y1 = np.ones(n)
y4 = np.ones(n)
y2 = np.zeros(n)
y3 = np.zeros(n)
y = np.concatenate((y1,y2,y3,y4))
print (x1[[1,2],:])
print (x2[[1,2],:])
print (x3[[1,2],:])
print (x4[[1,2],:])

import matplotlib.pyplot as plt

plt.plot(x1[:,0], x1[:,1], 'ro')
plt.plot(x2[:,0], x2[:,1], 'bo')
plt.plot(x3[:,0], x3[:,1], 'bo')
plt.plot(x4[:,0], x4[:,1], 'ro')

plt.show()

@katejarne

This comment has been minimized.

Show comment
Hide comment
@katejarne

katejarne Feb 15, 2018

Do you know how to build a xor model (or other binary task) using simple recurrent layers? Does it has any sense to do that? Could you comment regarding that?

Do you know how to build a xor model (or other binary task) using simple recurrent layers? Does it has any sense to do that? Could you comment regarding that?

@akol67

This comment has been minimized.

Show comment
Hide comment
@akol67

akol67 Mar 7, 2018

baj12 left us a good example. While working on it (I added some noise) I could not get loss values less than 0.22...
Any help to get better loss values?
PS: if you want to change from SOFTMAX to SIGMOID activation you should remove categorical from y.

n= 200
ruido = 3
fat = n*ruido/100
print("nivel de ruido",ruido,"%")

x1 = np.random.rand(n,2) * (-1)
plt.plot(x1[:,0], x1[:,1], 'ro')
x11 = np.random.rand(fat,2) * (-1)
plt.plot(x11[:,0], x11[:,1], 'bo')
x2 = np.random.rand(n,2)
x2[:,1] *= (-1)
plt.plot(x2[:,0], x2[:,1], 'bo')
x22 = np.random.rand(fat,2)
x22[:,1] *= (-1)
plt.plot(x22[:,0], x22[:,1], 'ro')
x3 = np.random.rand(n,2)
x3[:,0] *= (-1)
plt.plot(x3[:,0], x3[:,1], 'bo')
x33 = np.random.rand(fat,2)
x33[:,0] *= (-1)
plt.plot(x33[:,0], x33[:,1], 'ro')
x4 = np.random.rand(n,2)
plt.plot(x4[:,0], x4[:,1], 'ro')
x44 = np.random.rand(fat,2)
plt.plot(x44[:,0], x44[:,1], 'bo')
X = np.concatenate((x1,x11,x2,x22,x3,x33,x4,x44))
X = (X + 1 ) /2
y1 = np.ones(n)
y11= np.zeros(fat)
y4 = np.ones(n)
y44 = np.zeros(fat)
y2 = np.zeros(n)
y22 = np.ones(fat)
y3 = np.zeros(n)
y33 = np.ones(fat)
y = np.concatenate((y1,y11,y2,y22,y3,y33,y4,y44))

if you want to change from SOFTMAX to SIGMOID activation you should remove categorical from y.

y2 = np_utils.to_categorical(y)
#y = np_utils.to_categorical(y)
model = Sequential()
model.add(Dense(12, input_dim=X.shape[1], activation='tanh',kernel_initializer = VarianceScaling()))
model.add(Dense(2, init='uniform', activation='softmax', name="output"))
#model.add(Dense(2, init='uniform', activation='sigmoid', name="output"))
sgd = SGD(lr=0.01)
model.compile(loss='binary_crossentropy', optimizer=sgd)
model.summary()
model.fit(X, y2, batch_size=2, shuffle=True, nb_epoch=2000, verbose=1,callbacks =[EarlyStoppingByLossVal()])
#model.fit(X, y, batch_size=2, shuffle=True, nb_epoch=2000, verbose=1,callbacks =[EarlyStoppingByLossVal()])
plot_decision_boundary(lambda X:model.predict_classes(X))
print("Last epoch: " + str(lastEpoch))

akol67 commented Mar 7, 2018

baj12 left us a good example. While working on it (I added some noise) I could not get loss values less than 0.22...
Any help to get better loss values?
PS: if you want to change from SOFTMAX to SIGMOID activation you should remove categorical from y.

n= 200
ruido = 3
fat = n*ruido/100
print("nivel de ruido",ruido,"%")

x1 = np.random.rand(n,2) * (-1)
plt.plot(x1[:,0], x1[:,1], 'ro')
x11 = np.random.rand(fat,2) * (-1)
plt.plot(x11[:,0], x11[:,1], 'bo')
x2 = np.random.rand(n,2)
x2[:,1] *= (-1)
plt.plot(x2[:,0], x2[:,1], 'bo')
x22 = np.random.rand(fat,2)
x22[:,1] *= (-1)
plt.plot(x22[:,0], x22[:,1], 'ro')
x3 = np.random.rand(n,2)
x3[:,0] *= (-1)
plt.plot(x3[:,0], x3[:,1], 'bo')
x33 = np.random.rand(fat,2)
x33[:,0] *= (-1)
plt.plot(x33[:,0], x33[:,1], 'ro')
x4 = np.random.rand(n,2)
plt.plot(x4[:,0], x4[:,1], 'ro')
x44 = np.random.rand(fat,2)
plt.plot(x44[:,0], x44[:,1], 'bo')
X = np.concatenate((x1,x11,x2,x22,x3,x33,x4,x44))
X = (X + 1 ) /2
y1 = np.ones(n)
y11= np.zeros(fat)
y4 = np.ones(n)
y44 = np.zeros(fat)
y2 = np.zeros(n)
y22 = np.ones(fat)
y3 = np.zeros(n)
y33 = np.ones(fat)
y = np.concatenate((y1,y11,y2,y22,y3,y33,y4,y44))

if you want to change from SOFTMAX to SIGMOID activation you should remove categorical from y.

y2 = np_utils.to_categorical(y)
#y = np_utils.to_categorical(y)
model = Sequential()
model.add(Dense(12, input_dim=X.shape[1], activation='tanh',kernel_initializer = VarianceScaling()))
model.add(Dense(2, init='uniform', activation='softmax', name="output"))
#model.add(Dense(2, init='uniform', activation='sigmoid', name="output"))
sgd = SGD(lr=0.01)
model.compile(loss='binary_crossentropy', optimizer=sgd)
model.summary()
model.fit(X, y2, batch_size=2, shuffle=True, nb_epoch=2000, verbose=1,callbacks =[EarlyStoppingByLossVal()])
#model.fit(X, y, batch_size=2, shuffle=True, nb_epoch=2000, verbose=1,callbacks =[EarlyStoppingByLossVal()])
plot_decision_boundary(lambda X:model.predict_classes(X))
print("Last epoch: " + str(lastEpoch))

@akol67

This comment has been minimized.

Show comment
Hide comment
@akol67

akol67 Mar 7, 2018

sorry, forgot to include the libraries I used to run the code above

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import Callback
from keras.initializers import VarianceScaling
import numpy as np
import matplotlib.pyplot as plt

lastEpoch = 0

class EarlyStoppingByLossVal(Callback):
def init(self, monitor='val_loss', value=0.02, verbose=0):
super(Callback, self).init()
self.monitor = monitor
self.value = value
self.verbose = verbose
def on_epoch_end(self, epoch, logs={}):
global lastEpoch
current = logs.get("loss")
if current != None and current < self.value:
self.model.stop_training = True
lastEpoch = epoch + 1

def plot_decision_boundary(pred_func):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)

akol67 commented Mar 7, 2018

sorry, forgot to include the libraries I used to run the code above

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.callbacks import Callback
from keras.initializers import VarianceScaling
import numpy as np
import matplotlib.pyplot as plt

lastEpoch = 0

class EarlyStoppingByLossVal(Callback):
def init(self, monitor='val_loss', value=0.02, verbose=0):
super(Callback, self).init()
self.monitor = monitor
self.value = value
self.verbose = verbose
def on_epoch_end(self, epoch, logs={}):
global lastEpoch
current = logs.get("loss")
if current != None and current < self.value:
self.model.stop_training = True
lastEpoch = epoch + 1

def plot_decision_boundary(pred_func):
# Set min and max values and give it some padding
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
# Generate a grid of points with distance h between them
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the function value for the whole gid
Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# Plot the contour and training examples
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment