Created
March 9, 2015 12:48
-
-
Save spolakh/6c44d0b3257b82161b7f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pylab as plt\n%matplotlib inline", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "\u041f\u043e\u0434\u0433\u0440\u0443\u0437\u0438\u043c \u0440\u0435\u043f\u043e\u0432\u044b\u0439 \u0434\u0430\u0442\u0430\u0441\u0435\u0442 \u043c\u0430\u0441\u0441, \u043a\u0430\u043a \u0438 \u0432 \u0442\u0443\u0442\u043e\u0440\u0438\u0430\u043b\u0435", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "from rep.utils import train_test_split\nfrom sklearn.metrics import roc_auc_score\n\nsig_data = pd.read_csv('../toy_datasets/toyMC_sig_mass.csv', sep='\\t')\nbck_data = pd.read_csv('../toy_datasets/toyMC_bck_mass.csv', sep='\\t')\n\nlabels = np.array([1] * len(sig_data) + [0] * len(bck_data))\ndata = pd.concat([sig_data, bck_data])\nvariables = [\"FlightDistance\", \"FlightDistanceError\", \"IP\", \"VertexChi2\", \"pt\", \"p0_pt\", \"p1_pt\", \"p2_pt\", 'LifeTime','dira']\n\nfrom sklearn.preprocessing import MinMaxScaler, Imputer\n\ndata[variables] = MinMaxScaler().fit_transform(Imputer().fit_transform(data[variables].values))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "X_train, X_test, y_train, y_test = train_test_split(data[variables].values, labels, train_size=0.5)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "X_train.shape", "outputs": [{"execution_count": 4, "output_type": "execute_result", "data": {"text/plain": "(72449, 10)"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Neurolab", "cell_type": "markdown", "metadata": {}}, {"execution_count": 13, "cell_type": "code", "source": "import neurolab as nl\nf2 = nl.trans.SoftMax()\nf = nl.trans.LogSig()\ninit = []\nfor i in range(0, X_train.shape[1]):\n init.append([0,1])\nnet = nl.net.newff(init,[X_train.shape[1], 300, 2], [f, f, f])\nfrom rep.estimators import NeurolabClassifier\nclf = NeurolabClassifier(net, epochs=10, show=1)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 14, "cell_type": "code", "source": "from sklearn.preprocessing import OneHotEncoder\n\ny = y_train.reshape((len(y_train),1))\nlabel_train = np.array(OneHotEncoder(n_values=2).fit_transform(y).todense())", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 15, "cell_type": "code", "source": "%time clf.fit(X_train, label_train)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Epoch: 1; Error: 18112.25;\nEpoch: 2; Error: 10373.4673604;\nEpoch: 3; Error: 10082.8553514;\nEpoch: 4; Error: 26714.4420237;\nEpoch: 5; Error: 12031.5684802;\nEpoch: 6; Error: 9115.59963079;\nEpoch: 7; Error: 9096.51882764;\nEpoch: 8; Error: 8844.50092229;\nEpoch: 9; Error: 8876.05689853;\nEpoch: 10; Error: 8824.34268138;\nThe maximum number of train epochs is reached\nCPU times: user 2min 21s, sys: 168 ms, total: 2min 21s\nWall time: 2min 21s\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 26, "cell_type": "code", "source": "predict_labels = clf.predict(X_test)\npredict_proba = clf.predict_proba(X_test)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 27, "cell_type": "code", "source": "print predict_labels\nprint predict_proba", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[ 1. 1. 1. ..., 1. 1. 1.]\n[[ 0.15272836 0.84727164]\n [ 0.15581779 0.84418221]\n [ 0.1488965 0.8511035 ]\n ..., \n [ 0.14621196 0.85378804]\n [ 0.15290298 0.84709702]\n [ 0.16100298 0.83899702]]\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 29, "cell_type": "code", "source": "from sklearn.metrics import roc_auc_score\nroc_auc_score(y_test, predict_proba[:, 1])", "outputs": [{"execution_count": 29, "output_type": "execute_result", "data": {"text/plain": "0.71007343705611281"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 30, "cell_type": "code", "source": "from sklearn.metrics import classification_report\nfrom sklearn.metrics import zero_one_loss\n\nprint \"Accuracy:\", zero_one_loss(y_test, predict_labels)\nprint \"Classification report:\"\nprint classification_report(y_test, predict_labels)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Accuracy: 0.13922705314\nClassification report:\n precision recall f1-score support\n\n 0 0.00 0.00 0.00 10087\n 1 0.86 1.00 0.93 62363\n\navg / total 0.74 0.86 0.80 72450\n\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.6", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment