alfard/FBNN.py

## FBNN.py
import pandas as pd
import numpy as np
from sklearn import ensemble, feature_extraction, preprocessing

A=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/A.pk'))
#A = A.join(train[['outcome']], on='bidder_id')
A.shape

B=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/B.pk'))
#B=train[['bidder_id','outcome']]

A=pd.merge(A, B, how='inner',on='bidder_id')
A.shape

A=A.fillna(0)


#Forest test 1
import numpy as np
import csv
import random
from sklearn.cross_validation import train_test_split
from sklearn import ensemble, feature_extraction, preprocessing
from sklearn.metrics import roc_auc_score


Y = A.outcome.values

X = A.drop('outcome',axis=1)
X = X.drop('bidder_id', axis=1)

X=np.array(X)

X= preprocessing.scale(X)

#############################################################################
#Recuperer outcome
C=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/A.pk'))
#A = A.join(train[['outcome']], on='bidder_id')
C=C.fillna(0)

Id=C.bidder_id.values
C=C.drop('bidder_id', axis=1)


C=np.array(C)

C= preprocessing.scale(C)

##############################################################################


##############################################################################

from sknn.mlp import Classifier, Convolution, Layer

from sknn.mlp import Classifier, Layer
from sklearn import cross_validation


n = 50  # repeat the CV procedure 10 times to get more precise results

mean_auc = 0.0


Z=np.empty([len(C)])

m=0

for i in range(n):

	clf = Classifier(
	    layers=[
#	        Layer("Rectifier", units=2),
		Layer("Maxout", units=500, pieces=2),
		Layer("Sigmoid", units=10),
		Layer("Softmax")],
	    	learning_rate=0.00001,
	    	n_iter=2000,
#		regularize='L1',
		valid_size=0.4,
		verbose=1,
		learning_rule='nesterov')

	clf.fit(X, Y)

	Pr=clf.predict_proba(X)[:, 1]
	auc = roc_auc_score(Y, Pr)
	print auc,i,m

	if auc>0.89 :
		print "On garde le model"
		m=m+1
		Proba=clf.predict_proba(C)[:, 1]
		Z=np.column_stack((Z,Proba))


print "Model keep number ",m

Z=np.delete(Z, 0, 1)

print Z.shape

NN_E=np.mean(Z, axis=1 )


np.savez('/home/alfard/Documents/Kaggle/Facebook-Robot/NN_E_2.npz',Proba)


#NN_E=np.load('/home/alfard/Documents/Kaggle/Facebook-Robot/NN_E.npz')
#NN_E=NN_E['arr_0']


#regularize: string, optional
#        Which regularization technique to use on the weights, for example ``L2`` (most
#        common) or ``L1`` (quite rare), as well as ``dropout``.  By default, there's no
#        regularization, unless another parameter implies it should be enabled, e.g. if
#        ``weight_decay`` or ``dropout_rate`` are specified.


#learning_rule: str
#Name of the learning rule used during stochastic gradient descent,
#one of ``sgd``, ``momentum``, ``nesterov``, ``adadelta``, ``adagrad`` or
#``rmsprop`` at the moment.  The default is vanilla ``sgd``

#valid_set: tuple of array-like
#        Validation set (X_v, y_v) to be used explicitly while training.  Both
#        arrays should have the same size for the first dimention, and the second
#        dimention should match with the training data specified in ``fit()``
	import pandas as pd
	import numpy as np
	from sklearn import ensemble, feature_extraction, preprocessing

	A=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/A.pk'))
	#A = A.join(train[['outcome']], on='bidder_id')
	A.shape

	B=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/B.pk'))
	#B=train[['bidder_id','outcome']]

	A=pd.merge(A, B, how='inner',on='bidder_id')
	A.shape

	A=A.fillna(0)


	#Forest test 1
	import numpy as np
	import csv
	import random
	from sklearn.cross_validation import train_test_split
	from sklearn import ensemble, feature_extraction, preprocessing
	from sklearn.metrics import roc_auc_score


	Y = A.outcome.values

	X = A.drop('outcome',axis=1)
	X = X.drop('bidder_id', axis=1)

	X=np.array(X)

	X= preprocessing.scale(X)

	#############################################################################
	#Recuperer outcome
	C=pd.read_pickle(('/home/alfard/Documents/Kaggle/Facebook-Robot/A.pk'))
	#A = A.join(train[['outcome']], on='bidder_id')
	C=C.fillna(0)

	Id=C.bidder_id.values
	C=C.drop('bidder_id', axis=1)


	C=np.array(C)

	C= preprocessing.scale(C)

	##############################################################################


	##############################################################################

	from sknn.mlp import Classifier, Convolution, Layer

	from sknn.mlp import Classifier, Layer
	from sklearn import cross_validation


	n = 50 # repeat the CV procedure 10 times to get more precise results

	mean_auc = 0.0


	Z=np.empty([len(C)])

	m=0

	for i in range(n):

	clf = Classifier(
	layers=[
	# Layer("Rectifier", units=2),
	Layer("Maxout", units=500, pieces=2),
	Layer("Sigmoid", units=10),
	Layer("Softmax")],
	learning_rate=0.00001,
	n_iter=2000,
	# regularize='L1',
	valid_size=0.4,
	verbose=1,
	learning_rule='nesterov')

	clf.fit(X, Y)

	Pr=clf.predict_proba(X)[:, 1]
	auc = roc_auc_score(Y, Pr)
	print auc,i,m

	if auc>0.89 :
	print "On garde le model"
	m=m+1
	Proba=clf.predict_proba(C)[:, 1]
	Z=np.column_stack((Z,Proba))


	print "Model keep number ",m

	Z=np.delete(Z, 0, 1)

	print Z.shape

	NN_E=np.mean(Z, axis=1 )


	np.savez('/home/alfard/Documents/Kaggle/Facebook-Robot/NN_E_2.npz',Proba)



	#NN_E=np.load('/home/alfard/Documents/Kaggle/Facebook-Robot/NN_E.npz')
	#NN_E=NN_E['arr_0']


	#regularize: string, optional
	# Which regularization technique to use on the weights, for example ``L2`` (most
	# common) or ``L1`` (quite rare), as well as ``dropout``. By default, there's no
	# regularization, unless another parameter implies it should be enabled, e.g. if
	# ``weight_decay`` or ``dropout_rate`` are specified.


	#learning_rule: str
	#Name of the learning rule used during stochastic gradient descent,
	#one of ``sgd``, ``momentum``, ``nesterov``, ``adadelta``, ``adagrad`` or
	#``rmsprop`` at the moment. The default is vanilla ``sgd``

	#valid_set: tuple of array-like
	# Validation set (X_v, y_v) to be used explicitly while training. Both
	# arrays should have the same size for the first dimention, and the second
	# dimention should match with the training data specified in ``fit()``