Skip to content

Instantly share code, notes, and snippets.

@spolakh
Created March 9, 2015 12:48
Show Gist options
  • Save spolakh/6c44d0b3257b82161b7f to your computer and use it in GitHub Desktop.
Save spolakh/6c44d0b3257b82161b7f to your computer and use it in GitHub Desktop.
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pylab as plt\n%matplotlib inline", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "\u041f\u043e\u0434\u0433\u0440\u0443\u0437\u0438\u043c \u0440\u0435\u043f\u043e\u0432\u044b\u0439 \u0434\u0430\u0442\u0430\u0441\u0435\u0442 \u043c\u0430\u0441\u0441, \u043a\u0430\u043a \u0438 \u0432 \u0442\u0443\u0442\u043e\u0440\u0438\u0430\u043b\u0435", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "from rep.utils import train_test_split\nfrom sklearn.metrics import roc_auc_score\n\nsig_data = pd.read_csv('../toy_datasets/toyMC_sig_mass.csv', sep='\\t')\nbck_data = pd.read_csv('../toy_datasets/toyMC_bck_mass.csv', sep='\\t')\n\nlabels = np.array([1] * len(sig_data) + [0] * len(bck_data))\ndata = pd.concat([sig_data, bck_data])\nvariables = [\"FlightDistance\", \"FlightDistanceError\", \"IP\", \"VertexChi2\", \"pt\", \"p0_pt\", \"p1_pt\", \"p2_pt\", 'LifeTime','dira']\n\nfrom sklearn.preprocessing import MinMaxScaler, Imputer\n\ndata[variables] = MinMaxScaler().fit_transform(Imputer().fit_transform(data[variables].values))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "X_train, X_test, y_train, y_test = train_test_split(data[variables].values, labels, train_size=0.5)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "X_train.shape", "outputs": [{"execution_count": 4, "output_type": "execute_result", "data": {"text/plain": "(72449, 10)"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Neurolab", "cell_type": "markdown", "metadata": {}}, {"execution_count": 13, "cell_type": "code", "source": "import neurolab as nl\nf2 = nl.trans.SoftMax()\nf = nl.trans.LogSig()\ninit = []\nfor i in range(0, X_train.shape[1]):\n init.append([0,1])\nnet = nl.net.newff(init,[X_train.shape[1], 300, 2], [f, f, f])\nfrom rep.estimators import NeurolabClassifier\nclf = NeurolabClassifier(net, epochs=10, show=1)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 14, "cell_type": "code", "source": "from sklearn.preprocessing import OneHotEncoder\n\ny = y_train.reshape((len(y_train),1))\nlabel_train = np.array(OneHotEncoder(n_values=2).fit_transform(y).todense())", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 15, "cell_type": "code", "source": "%time clf.fit(X_train, label_train)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Epoch: 1; Error: 18112.25;\nEpoch: 2; Error: 10373.4673604;\nEpoch: 3; Error: 10082.8553514;\nEpoch: 4; Error: 26714.4420237;\nEpoch: 5; Error: 12031.5684802;\nEpoch: 6; Error: 9115.59963079;\nEpoch: 7; Error: 9096.51882764;\nEpoch: 8; Error: 8844.50092229;\nEpoch: 9; Error: 8876.05689853;\nEpoch: 10; Error: 8824.34268138;\nThe maximum number of train epochs is reached\nCPU times: user 2min 21s, sys: 168 ms, total: 2min 21s\nWall time: 2min 21s\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 26, "cell_type": "code", "source": "predict_labels = clf.predict(X_test)\npredict_proba = clf.predict_proba(X_test)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 27, "cell_type": "code", "source": "print predict_labels\nprint predict_proba", "outputs": [{"output_type": "stream", "name": "stdout", "text": "[ 1. 1. 1. ..., 1. 1. 1.]\n[[ 0.15272836 0.84727164]\n [ 0.15581779 0.84418221]\n [ 0.1488965 0.8511035 ]\n ..., \n [ 0.14621196 0.85378804]\n [ 0.15290298 0.84709702]\n [ 0.16100298 0.83899702]]\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 29, "cell_type": "code", "source": "from sklearn.metrics import roc_auc_score\nroc_auc_score(y_test, predict_proba[:, 1])", "outputs": [{"execution_count": 29, "output_type": "execute_result", "data": {"text/plain": "0.71007343705611281"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 30, "cell_type": "code", "source": "from sklearn.metrics import classification_report\nfrom sklearn.metrics import zero_one_loss\n\nprint \"Accuracy:\", zero_one_loss(y_test, predict_labels)\nprint \"Classification report:\"\nprint classification_report(y_test, predict_labels)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Accuracy: 0.13922705314\nClassification report:\n precision recall f1-score support\n\n 0 0.00 0.00 0.00 10087\n 1 0.86 1.00 0.93 62363\n\navg / total 0.74 0.86 0.80 72450\n\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.6", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment