Skip to content

Instantly share code, notes, and snippets.

@MaxHalford
Created May 18, 2017 15:35
Show Gist options
  • Save MaxHalford/9bfaa8daf8b4bc17a7fb7ba58c880675 to your computer and use it in GitHub Desktop.
Save MaxHalford/9bfaa8daf8b4bc17a7fb7ba58c880675 to your computer and use it in GitHub Desktop.
Keras fit/predict scikit-learn pipeline
import os
from keras import backend as K
from keras import callbacks
from keras import layers
from keras import models
from keras.wrappers.scikit_learn import KerasClassifier
import pandas as pd
import tensorflow as tf
from sklearn import metrics
from sklearn import pipeline
from sklearn import preprocessing
from sklearn.externals import joblib
# Load data
X_train = pd.read_csv('data/X_train.csv')
y_train = pd.read_csv('data/y_train.csv')['is_listened']
# Use Tenserflow backend
sess = tf.Session()
K.set_session(sess)
def model():
model = models.Sequential([
layers.Dense(64, input_dim=X_train.shape[1], activation='relu'),
layers.Dropout(0.5),
layers.Dense(64, activation='relu'),
layers.Dropout(0.5),
layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
return model
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto')
pipe = pipeline.Pipeline([
('rescale', preprocessing.StandardScaler()),
('nn', KerasClassifier(build_fn=model, nb_epoch=10, batch_size=128,
validation_split=0.2, callbacks=[early_stopping]))
])
pipe.fit(X_train.values, y_train.values)
directory = os.path.dirname(os.path.realpath(__file__))
model_step = pipe.steps.pop(-1)[1]
joblib.dump(pipe, os.path.join(directory, 'pipeline.pkl'))
models.save_model(model_step.model, os.path.join(directory, 'model.h5'))
import os
from keras import models
import pandas as pd
from sklearn.externals import joblib
X_test = pd.read_csv('data/X_test.csv')
y_test = pd.read_csv('data/y_test.csv')
directory = os.path.dirname(os.path.realpath(__file__))
pipe = joblib.load(os.path.join(directory, 'pipeline.pkl'))
model = models.load_model(os.path.join(directory, 'model.h5'))
pipe.steps.append(('nn', model))
pred = pipe.predict_proba(X_test)[:, 0]
submission = pd.DataFrame(data={
'sample_id': y_test['sample_id'].astype(int),
'is_listened': pred
}).sort_values('sample_id')
submission.to_csv(os.path.join(directory, 'submission_keras.csv'), index=False)
@allenjeep
Copy link

Hi may I ask if I have like 10 classes how can I fit the np.utils.to_categorical into the pipeline?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment