Skip to content

Instantly share code, notes, and snippets.

@izharikov
Last active January 19, 2018 14:36
Show Gist options
  • Save izharikov/46cdf1b13ede53cf979d325bfafcdf4f to your computer and use it in GitHub Desktop.
Save izharikov/46cdf1b13ede53cf979d325bfafcdf4f to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from sklearn.metrics import log_loss
def test_with_best_result(path_to_current):
path_to_best = "C:\\Users\\Igor\\Downloads\\submission54.csv"
best = pd.read_csv(path_to_best)
current = pd.read_csv(path_to_current)
bst_is_iceberg = np.array(best['is_iceberg'].tolist())
cur_is_iceberg = np.array(current['is_iceberg'].tolist())
err = np.sqrt(np.mean((bst_is_iceberg - cur_is_iceberg) ** 2))
print("Error", err)
def cmp_from_kaggle(path_to_current):
path_to_best = "C:\\Users\\Igor\\Downloads\\submission54.csv"
out5 = pd.read_csv(path_to_best, index_col=0)
labels = (out5 > 0.5).astype(int)
out5err = log_loss(labels, out5)
Lerr = 0.1427
print('out5 Error:', Lerr + out5err)
our_out = pd.read_csv(path_to_current, index_col=0)
our_err = log_loss(labels, our_out)
print('my Error:', Lerr + our_err)
if __name__ == "__main__":
path_to_current = "result.csv"
test_with_best_result(path_to_current)
cmp_from_kaggle(path_to_current)
from kaggle.train import getModel, normalize_input
import pandas as pd
import numpy as np
if __name__ == "__main__":
path_to_file = "C:\\Users\\Igor\\Downloads\\test.json\\data\\processed\\test.json"
test = pd.read_json(path_to_file)
X_band_test_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]])
X_band_test_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]])
X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis]
, X_band_test_2[:, :, :, np.newaxis]
, ((X_band_test_1+X_band_test_2)/2)[:, :, :, np.newaxis]], axis=-1)
X_test = normalize_input(X_test)
gmodel = getModel()
file_path = ".model_weights.hdf5"
gmodel.load_weights(filepath=file_path)
predicted_test=gmodel.predict_proba(X_test)
submission = pd.DataFrame()
submission['id']=test['id']
submission['is_iceberg']=predicted_test.reshape((predicted_test.shape[0]))
submission.to_csv('result.csv', index=False)
from matplotlib import pyplot
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from keras.layers import GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras import initializers
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import math
import tensorflow as tf
def getModel():
# Building the model
gmodel = Sequential()
# Conv Layer 1
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu', input_shape=(75, 75, 3)))
gmodel.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
gmodel.add(Dropout(0.3))
# Conv Layer 1.1
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu'))
gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
gmodel.add(Dropout(0.3))
# Conv Layer 2
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu'))
gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
gmodel.add(Dropout(0.3))
# Conv Layer 3
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu'))
# gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
gmodel.add(Dropout(0.3))
#
# # Conv Layer 4
gmodel.add(Conv2D(64, kernel_size=(2, 2), activation='relu'))
# gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
gmodel.add(Dropout(0.3))
# Flatten the data for upcoming dense layers
gmodel.add(Flatten())
# Dense Layers
gmodel.add(Dense(512))
gmodel.add(Activation('relu'))
gmodel.add(Dropout(0.5))
# Dense Layer 2
gmodel.add(Dense(256))
gmodel.add(Activation('relu'))
gmodel.add(Dropout(0.5))
# Sigmoid Layer
gmodel.add(Dense(1))
gmodel.add(Activation('sigmoid'))
mypotim = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
gmodel.compile(loss='binary_crossentropy',
optimizer=mypotim,
metrics=['accuracy'])
gmodel.summary()
return gmodel
def get_callbacks(filepath, patience=2):
es = EarlyStopping('val_loss', patience=patience, mode="min")
msave = ModelCheckpoint(filepath, save_best_only=True)
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')
return [es, msave, reduce_lr_loss]
def normalize_input(x_input):
# sess = tf.Session()
# x = tf.nn.l2_normalize(x_input, 3, epsilon=1e-12, name=None)
# return sess.run(x)
return x_input
if __name__ == "__main__":
file_path = ".model_weights.hdf5"
callbacks = get_callbacks(filepath=file_path, patience=8)
path_to_file = "C:\\Users\\Igor\\Downloads\\train.json\\data\\processed\\train.json"
train = pd.read_json(path_to_file)
X_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
X_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
X_train = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis],
((X_band_1 + X_band_2) / 2)[:, :, :, np.newaxis]], axis=-1)
target_train = train['is_iceberg']
X_train = normalize_input(X_train)
X_train_cv, X_valid, y_train_cv, y_valid = train_test_split(X_train, target_train, random_state=1, train_size=0.75)
gmodel = getModel()
gmodel.fit(X_train_cv, y_train_cv,
batch_size=32,
epochs=50,
verbose=1,
validation_data=(X_valid, y_valid),
callbacks=callbacks)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment