Skip to content

Instantly share code, notes, and snippets.

@savonarola
Created March 30, 2016 14:24
Show Gist options
  • Save savonarola/d3a5d24763c7d3361cb3f6ca64c3315d to your computer and use it in GitHub Desktop.
Save savonarola/d3a5d24763c7d3361cb3f6ca64c3315d to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from math import exp, pow, sqrt\n",
"from sklearn.metrics import roc_auc_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = pd.read_csv(\"data-logistic.csv\", header=None)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y = data[0].values\n",
"xs = data[[1,2]].values"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"l = xs.size"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def logistic_step(w, k, C):\n",
" w1, w2 = w\n",
" \n",
" sum_w1 = 0\n",
" sum_w2 = 0\n",
" for i in range(0, y.size):\n",
" yi = y[i]\n",
" xi1 = xs[i][0]\n",
" xi2 = xs[i][1]\n",
" coef = yi * ( 1 - 1 / ( 1 + exp( - yi*( w1*xi1 + w2*xi2 ))))\n",
" sum_w1 += xi1 * coef\n",
" sum_w2 += xi2 * coef\n",
" \n",
" w1_new = w1 + k*(1/l)*sum_w1 - k * C * w1\n",
" w2_new = w2 + k*(1/l)*sum_w2 - k * C * w2\n",
" \n",
" return (w1_new, w2_new)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def ediff(v, w):\n",
" v1, v2 = v\n",
" w1, w2 = w\n",
" return sqrt(pow(v1 - w1, 2) + pow(v2 - w2, 2))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"threshold = 1e-5"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def iterate(w0, k, C):\n",
" i = 0\n",
" w_old = w0\n",
" while True:\n",
" i += 1\n",
" w_new = logistic_step(w_old, k, C)\n",
" # print(\"i=%s, w: %s -> %s\" % (i, w_old, w_new)) \n",
" if ediff(w_old, w_new) < threshold:\n",
" break\n",
" w_old = w_new\n",
" return w_old"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def y_calc(w, x):\n",
" w1, w2 = w\n",
" if w1 * x[0] + w2 * x[1] > 0:\n",
" return 1\n",
" else:\n",
" return -1"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def score(w, x):\n",
" w1, w2 = w\n",
" return 1 / (1 + exp(-w1 * x[0] - w2 * x[1]))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"w_reg = iterate((0,0), 0.1, 10)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_calc_reg = np.array([ y_calc(w_reg, x) for x in xs])"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_score_reg = np.array([ score(w_reg, x) for x in xs])"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"w_no_reg = iterate((0,0), 0.1, 0)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_calc_no_reg = np.array([ y_calc(w_no_reg, x) for x in xs])"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_score_no_reg = np.array([ score(w_no_reg, x) for x in xs])"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.93666666666666654"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roc_auc_score(y, y_score_reg)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.92685714285714282"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roc_auc_score(y, y_score_no_reg)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment