Created
July 1, 2015 15:51
-
-
Save alfard/cc4ee6f704f4a32d4afe to your computer and use it in GitHub Desktop.
Facebook-Neural-Network
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn import ensemble, feature_extraction, preprocessing | |
A=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/A.pk')) | |
#A = A.join(train[['outcome']], on='bidder_id') | |
A.shape | |
B=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/B.pk')) | |
#B=train[['bidder_id','outcome']] | |
A=pd.merge(A, B, how='inner',on='bidder_id') | |
A.shape | |
A=A.fillna(0) | |
#Forest test 1 | |
import numpy as np | |
import csv | |
import random | |
from sklearn.cross_validation import train_test_split | |
from sklearn import ensemble, feature_extraction, preprocessing | |
from sklearn.metrics import roc_auc_score | |
Y = A.outcome.values | |
X = A.drop('outcome',axis=1) | |
X = X.drop('bidder_id', axis=1) | |
X=np.array(X) | |
X= preprocessing.scale(X) | |
############################################################################# | |
#Recuperer outcome | |
C=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/A.pk')) | |
#A = A.join(train[['outcome']], on='bidder_id') | |
C=C.fillna(0) | |
Id=C.bidder_id.values | |
C=C.drop('bidder_id', axis=1) | |
C=np.array(C) | |
C= preprocessing.scale(C) | |
############################################################################## | |
############################################################################## | |
from sknn.mlp import Classifier, Convolution, Layer | |
from sknn.mlp import Classifier, Layer | |
from sklearn import cross_validation | |
n = 50 # repeat the CV procedure 10 times to get more precise results | |
mean_auc = 0.0 | |
Z=np.empty([len(C)]) | |
m=0 | |
for i in range(n): | |
clf = Classifier( | |
layers=[ | |
# Layer("Rectifier", units=2), | |
Layer("Maxout", units=500, pieces=2), | |
Layer("Sigmoid", units=10), | |
Layer("Softmax")], | |
learning_rate=0.00001, | |
n_iter=2000, | |
# regularize='L1', | |
valid_size=0.4, | |
verbose=1, | |
learning_rule='nesterov') | |
clf.fit(X, Y) | |
Pr=clf.predict_proba(X)[:, 1] | |
auc = roc_auc_score(Y, Pr) | |
print auc,i,m | |
if auc>0.89 : | |
print "On garde le model" | |
m=m+1 | |
Proba=clf.predict_proba(C)[:, 1] | |
Z=np.column_stack((Z,Proba)) | |
print "Model keep number ",m | |
Z=np.delete(Z, 0, 1) | |
print Z.shape | |
NN_E=np.mean(Z, axis=1 ) | |
np.savez('/home/alfard/Documents/Kaggle/Facebook-Robot/NN_E_2.npz',Proba) | |
#NN_E=np.load('/home/alfard/Documents/Kaggle/Facebook-Robot/NN_E.npz') | |
#NN_E=NN_E['arr_0'] | |
#regularize: string, optional | |
# Which regularization technique to use on the weights, for example ``L2`` (most | |
# common) or ``L1`` (quite rare), as well as ``dropout``. By default, there's no | |
# regularization, unless another parameter implies it should be enabled, e.g. if | |
# ``weight_decay`` or ``dropout_rate`` are specified. | |
#learning_rule: str | |
#Name of the learning rule used during stochastic gradient descent, | |
#one of ``sgd``, ``momentum``, ``nesterov``, ``adadelta``, ``adagrad`` or | |
#``rmsprop`` at the moment. The default is vanilla ``sgd`` | |
#valid_set: tuple of array-like | |
# Validation set (X_v, y_v) to be used explicitly while training. Both | |
# arrays should have the same size for the first dimention, and the second | |
# dimention should match with the training data specified in ``fit()`` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment