Last active
January 19, 2018 14:36
-
-
Save izharikov/46cdf1b13ede53cf979d325bfafcdf4f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.metrics import log_loss | |
def test_with_best_result(path_to_current): | |
path_to_best = "C:\\Users\\Igor\\Downloads\\submission54.csv" | |
best = pd.read_csv(path_to_best) | |
current = pd.read_csv(path_to_current) | |
bst_is_iceberg = np.array(best['is_iceberg'].tolist()) | |
cur_is_iceberg = np.array(current['is_iceberg'].tolist()) | |
err = np.sqrt(np.mean((bst_is_iceberg - cur_is_iceberg) ** 2)) | |
print("Error", err) | |
def cmp_from_kaggle(path_to_current): | |
path_to_best = "C:\\Users\\Igor\\Downloads\\submission54.csv" | |
out5 = pd.read_csv(path_to_best, index_col=0) | |
labels = (out5 > 0.5).astype(int) | |
out5err = log_loss(labels, out5) | |
Lerr = 0.1427 | |
print('out5 Error:', Lerr + out5err) | |
our_out = pd.read_csv(path_to_current, index_col=0) | |
our_err = log_loss(labels, our_out) | |
print('my Error:', Lerr + our_err) | |
if __name__ == "__main__": | |
path_to_current = "result.csv" | |
test_with_best_result(path_to_current) | |
cmp_from_kaggle(path_to_current) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from kaggle.train import getModel, normalize_input | |
import pandas as pd | |
import numpy as np | |
if __name__ == "__main__": | |
path_to_file = "C:\\Users\\Igor\\Downloads\\test.json\\data\\processed\\test.json" | |
test = pd.read_json(path_to_file) | |
X_band_test_1=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_1"]]) | |
X_band_test_2=np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in test["band_2"]]) | |
X_test = np.concatenate([X_band_test_1[:, :, :, np.newaxis] | |
, X_band_test_2[:, :, :, np.newaxis] | |
, ((X_band_test_1+X_band_test_2)/2)[:, :, :, np.newaxis]], axis=-1) | |
X_test = normalize_input(X_test) | |
gmodel = getModel() | |
file_path = ".model_weights.hdf5" | |
gmodel.load_weights(filepath=file_path) | |
predicted_test=gmodel.predict_proba(X_test) | |
submission = pd.DataFrame() | |
submission['id']=test['id'] | |
submission['is_iceberg']=predicted_test.reshape((predicted_test.shape[0])) | |
submission.to_csv('result.csv', index=False) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import pyplot | |
from keras.preprocessing.image import ImageDataGenerator | |
from keras.models import Sequential | |
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation | |
from keras.layers import GlobalMaxPooling2D | |
from keras.layers.normalization import BatchNormalization | |
from keras.layers.merge import Concatenate | |
from keras.models import Model | |
from keras import initializers | |
from keras.optimizers import Adam | |
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau | |
from sklearn.model_selection import train_test_split | |
import pandas as pd | |
import numpy as np | |
import math | |
import tensorflow as tf | |
def getModel(): | |
# Building the model | |
gmodel = Sequential() | |
# Conv Layer 1 | |
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu', input_shape=(75, 75, 3))) | |
gmodel.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2))) | |
gmodel.add(Dropout(0.3)) | |
# Conv Layer 1.1 | |
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu')) | |
gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) | |
gmodel.add(Dropout(0.3)) | |
# Conv Layer 2 | |
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu')) | |
gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) | |
gmodel.add(Dropout(0.3)) | |
# Conv Layer 3 | |
gmodel.add(Conv2D(128, kernel_size=(2, 2), activation='relu')) | |
# gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) | |
gmodel.add(Dropout(0.3)) | |
# | |
# # Conv Layer 4 | |
gmodel.add(Conv2D(64, kernel_size=(2, 2), activation='relu')) | |
# gmodel.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) | |
gmodel.add(Dropout(0.3)) | |
# Flatten the data for upcoming dense layers | |
gmodel.add(Flatten()) | |
# Dense Layers | |
gmodel.add(Dense(512)) | |
gmodel.add(Activation('relu')) | |
gmodel.add(Dropout(0.5)) | |
# Dense Layer 2 | |
gmodel.add(Dense(256)) | |
gmodel.add(Activation('relu')) | |
gmodel.add(Dropout(0.5)) | |
# Sigmoid Layer | |
gmodel.add(Dense(1)) | |
gmodel.add(Activation('sigmoid')) | |
mypotim = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) | |
gmodel.compile(loss='binary_crossentropy', | |
optimizer=mypotim, | |
metrics=['accuracy']) | |
gmodel.summary() | |
return gmodel | |
def get_callbacks(filepath, patience=2): | |
es = EarlyStopping('val_loss', patience=patience, mode="min") | |
msave = ModelCheckpoint(filepath, save_best_only=True) | |
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min') | |
return [es, msave, reduce_lr_loss] | |
def normalize_input(x_input): | |
# sess = tf.Session() | |
# x = tf.nn.l2_normalize(x_input, 3, epsilon=1e-12, name=None) | |
# return sess.run(x) | |
return x_input | |
if __name__ == "__main__": | |
file_path = ".model_weights.hdf5" | |
callbacks = get_callbacks(filepath=file_path, patience=8) | |
path_to_file = "C:\\Users\\Igor\\Downloads\\train.json\\data\\processed\\train.json" | |
train = pd.read_json(path_to_file) | |
X_band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]]) | |
X_band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]]) | |
X_train = np.concatenate([X_band_1[:, :, :, np.newaxis], X_band_2[:, :, :, np.newaxis], | |
((X_band_1 + X_band_2) / 2)[:, :, :, np.newaxis]], axis=-1) | |
target_train = train['is_iceberg'] | |
X_train = normalize_input(X_train) | |
X_train_cv, X_valid, y_train_cv, y_valid = train_test_split(X_train, target_train, random_state=1, train_size=0.75) | |
gmodel = getModel() | |
gmodel.fit(X_train_cv, y_train_cv, | |
batch_size=32, | |
epochs=50, | |
verbose=1, | |
validation_data=(X_valid, y_valid), | |
callbacks=callbacks) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment