Created
March 19, 2015 15:47
-
-
Save spolakh/d1e7cc4e84752d5665f1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pylab as plt\n%matplotlib inline", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "from rep.utils import train_test_split\nfrom sklearn.metrics import roc_auc_score\n\nsig_data = pd.read_csv('../toy_datasets/toyMC_sig_mass.csv', sep='\\t')\nbck_data = pd.read_csv('../toy_datasets/toyMC_bck_mass.csv', sep='\\t')\n\nlabels = np.array([1] * len(sig_data) + [0] * len(bck_data))\ndata = pd.concat([sig_data, bck_data])\nvariables = [\"FlightDistance\", \"FlightDistanceError\", \"IP\", \"VertexChi2\", \"pt\", \"p0_pt\", \"p1_pt\", \"p2_pt\", 'LifeTime','dira']", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "X_train, X_test, y_train, y_test = train_test_split(data, labels, train_size=0.5)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "X_train.shape", "outputs": [{"execution_count": 4, "output_type": "execute_result", "data": {"text/plain": "(72449, 40)"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Neurolab", "cell_type": "markdown", "metadata": {}}, {"source": "\u0422\u0438\u043f \u0441\u0435\u0442\u0438 \u0437\u0430\u0434\u0430\u0451\u0442\u0441\u044f \u043e\u043f\u0446\u0438\u043e\u043d\u0430\u043b\u044c\u043d\u044b\u043c \u043f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u043e\u043c net_type. fit \u0438 predict \u0440\u0430\u0431\u043e\u0442\u0430\u044e\u0442 \u0432 \u043f\u043e\u043b\u043d\u043e\u043c \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0438\u0438 \u0441 sklearn'\u043e\u0432\u0441\u043a\u043e\u0439 \u0441\u043f\u0435\u0446\u0438\u0444\u0438\u043a\u0430\u0446\u0438\u0435\u0439, \u043d\u0438\u043a\u0430\u043a\u0438\u0445 \u043f\u0440\u0435\u0434\u0432\u0430\u0440\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0445 \u043f\u0440\u0435\u043e\u0431\u0440\u0430\u0437\u043e\u0432\u0430\u043d\u0438\u0439 \u0432\u0445\u043e\u0434\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u044c \u043d\u0435 \u043d\u0443\u0436\u043d\u043e. \u0422\u0430\u043a\u0436\u0435, \u0432 \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0438\u0438 \u0441 \u0438\u0434\u0435\u043e\u043b\u043e\u0433\u0438\u0435\u0439 REP, \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0438\u0432\u0430\u0435\u0442\u0441\u044f \u0430\u0440\u0433\u0443\u043c\u0435\u043d\u0442 features, \u043e\u0442\u0432\u0435\u0447\u0430\u044e\u0449\u0438\u0439 \u0437\u0430 \u0442\u043e, \u043f\u043e \u043a\u0430\u043a\u0438\u043c \u043f\u0440\u0438\u0437\u043d\u0430\u043a\u0430\u043c \u0438\u0434\u0451\u0442 \u043f\u043e\u0441\u0442\u0440\u043e\u0435\u043d\u0438\u0435 \u043c\u043e\u0434\u0435\u043b\u0438.", "cell_type": "markdown", "metadata": {}}, {"execution_count": 12, "cell_type": "code", "source": "import neurolab as nl\nf2 = nl.trans.SoftMax()\nf = nl.trans.LogSig()\nfrom rep.estimators import NeurolabClassifier\nclf = NeurolabClassifier(features=variables, show=1, transf=[f, f, f])", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 19, "cell_type": "code", "source": "%time clf.fit(X_train, y_train)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Epoch: 1; Error: 18112.25;\nEpoch: 2; Error: 10252.4735735;\nEpoch: 3; Error: 9965.43621617;\nEpoch: 4; Error: 26737.2827356;\nEpoch: 5; Error: 12012.6412675;\nEpoch: 6; Error: 9006.56659507;\nEpoch: 7; Error: 9025.80858723;\nEpoch: 8; Error: 8748.69865159;\nEpoch: 9; Error: 8790.338461;\nEpoch: 10; Error: 8730.6051094;\nThe maximum number of train epochs is reached\nCPU times: user 2min 21s, sys: 208 ms, total: 2min 21s\nWall time: 2min 21s\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 16, "cell_type": "code", "source": "predict_labels = clf.predict(X_test)\npredict_proba = clf.predict_proba(X_test)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 17, "cell_type": "code", "source": "print predict_labels\nprint predict_proba", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[1 1 1 ..., 1 1 1]\n[[ 0.14969219 0.85030781]\n [ 0.16340585 0.83659415]\n [ 0.14998982 0.85001018]\n ..., \n [ 0.14882971 0.85117029]\n [ 0.14423919 0.85576081]\n [ 0.14883047 0.85116953]]\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 18, "cell_type": "code", "source": "from sklearn.metrics import roc_auc_score\nroc_auc_score(y_test, predict_proba[:, 1])", "outputs": [{"execution_count": 18, "output_type": "execute_result", "data": {"text/plain": "0.71249081962693095"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "\u041f\u0440\u043e\u0432\u0435\u0440\u0438\u043c, \u0447\u0442\u043e set_params \u0440\u0430\u0431\u043e\u0442\u0430\u0435\u0442", "cell_type": "markdown", "metadata": {}}, {"execution_count": 31, "cell_type": "code", "source": "clf.set_params(epochs=50, show=0)\n%time clf.fit(X_train, y_train)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "CPU times: user 12min, sys: 933 ms, total: 12min 1s\nWall time: 12min 1s\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 23, "cell_type": "code", "source": "predict_proba = clf.predict_proba(X_test)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 24, "cell_type": "code", "source": "roc_auc_score(y_test, predict_proba[:, 1])", "outputs": [{"execution_count": 24, "output_type": "execute_result", "data": {"text/plain": "0.82945868098368991"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 30, "cell_type": "code", "source": "from sklearn.metrics import classification_report\nfrom sklearn.metrics import zero_one_loss\n\nprint \"Accuracy:\", zero_one_loss(y_test, predict_labels)\nprint \"Classification report:\"\nprint classification_report(y_test, predict_labels)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Accuracy: 0.138881987578\nClassification report:\n precision recall f1-score support\n\n 0 0.88 0.02 0.03 10208\n 1 0.86 1.00 0.93 62242\n\navg / total 0.86 0.86 0.80 72450\n\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "\u041f\u0440\u043e\u0432\u0435\u0440\u0438\u043c, \u0447\u0442\u043e \u0441\u0435\u0442\u044c \u043d\u043e\u0440\u043c\u0430\u043b\u044c\u043d\u043e (\u0434\u0435-)\u0441\u0435\u0440\u0438\u0430\u043b\u0438\u0437\u0443\u0435\u0442\u0441\u044f \u043d\u0430 \u0434\u0438\u0441\u043a", "cell_type": "markdown", "metadata": {}}, {"execution_count": 27, "cell_type": "code", "source": "import pickle\n\npickle.dump(clf, open(\"dump.p\", \"wb\"))\nclf_loaded = pickle.load(open(\"dump.p\", \"rb\"))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 29, "cell_type": "code", "source": "predict_proba = clf_loaded.predict_proba(X_test)\nroc_auc_score(y_test, predict_proba[:, 1])", "outputs": [{"execution_count": 29, "output_type": "execute_result", "data": {"text/plain": "0.82945868098368991"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.6", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment