Skip to content

Instantly share code, notes, and snippets.

@kiwamizamurai
Last active July 27, 2019 10:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kiwamizamurai/1dd1cab37605843c4dad8a419303d63a to your computer and use it in GitHub Desktop.
Save kiwamizamurai/1dd1cab37605843c4dad8a419303d63a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
},
"colab": {
"name": "LOGISTIC_01.ipynb",
"version": "0.3.2",
"provenance": [],
"include_colab_link": true
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/kiwamizamurai/f464781cdd914daf2dad38e70337d78c/logistic_01.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "W9AuWEgf5wKh",
"colab_type": "text"
},
"source": [
"# Logistic regression\n",
"\n",
"#### data\n",
"https://archive.ics.uci.edu/ml/datasets/Occupancy+Detection+"
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true,
"id": "Pkgl8OV15wKj",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "wdQJXkGO5wKp",
"colab_type": "code",
"outputId": "34886db2-e8e1-49ca-a2ce-10789340caf3",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 166
}
},
"source": [
"!ls"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"datatest2.txt datatest.txt datatraining.txt sample_data\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true,
"id": "53xSoVan5wKv",
"colab_type": "code",
"colab": {}
},
"source": [
"dataset1 = pd.read_csv(\"datatest.txt\")\n",
"dataset2 = pd.read_csv(\"datatraining.txt\")\n",
"dataset3 = pd.read_csv(\"datatest2.txt\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sT1oQ_g26niH",
"colab_type": "code",
"outputId": "91b4c84a-cadd-453e-fbef-b6272cc892b5",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 241
}
},
"source": [
"dataset1.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>2015-02-02 14:19:00</td>\n",
" <td>23.7000</td>\n",
" <td>26.272</td>\n",
" <td>585.200000</td>\n",
" <td>749.200000</td>\n",
" <td>0.004764</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>141</th>\n",
" <td>2015-02-02 14:19:59</td>\n",
" <td>23.7180</td>\n",
" <td>26.290</td>\n",
" <td>578.400000</td>\n",
" <td>760.400000</td>\n",
" <td>0.004773</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>142</th>\n",
" <td>2015-02-02 14:21:00</td>\n",
" <td>23.7300</td>\n",
" <td>26.230</td>\n",
" <td>572.666667</td>\n",
" <td>769.666667</td>\n",
" <td>0.004765</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>2015-02-02 14:22:00</td>\n",
" <td>23.7225</td>\n",
" <td>26.125</td>\n",
" <td>493.750000</td>\n",
" <td>774.750000</td>\n",
" <td>0.004744</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>2015-02-02 14:23:00</td>\n",
" <td>23.7540</td>\n",
" <td>26.200</td>\n",
" <td>488.600000</td>\n",
" <td>779.000000</td>\n",
" <td>0.004767</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Temperature ... HumidityRatio Occupancy\n",
"140 2015-02-02 14:19:00 23.7000 ... 0.004764 1\n",
"141 2015-02-02 14:19:59 23.7180 ... 0.004773 1\n",
"142 2015-02-02 14:21:00 23.7300 ... 0.004765 1\n",
"143 2015-02-02 14:22:00 23.7225 ... 0.004744 1\n",
"144 2015-02-02 14:23:00 23.7540 ... 0.004767 1\n",
"\n",
"[5 rows x 7 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 62
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "DOrcc2786qAl",
"colab_type": "code",
"outputId": "a98b0923-9f7c-4345-af45-4054d394098f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 241
}
},
"source": [
"dataset2.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-02-04 17:51:00</td>\n",
" <td>23.18</td>\n",
" <td>27.2720</td>\n",
" <td>426.0</td>\n",
" <td>721.25</td>\n",
" <td>0.004793</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-02-04 17:51:59</td>\n",
" <td>23.15</td>\n",
" <td>27.2675</td>\n",
" <td>429.5</td>\n",
" <td>714.00</td>\n",
" <td>0.004783</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-02-04 17:53:00</td>\n",
" <td>23.15</td>\n",
" <td>27.2450</td>\n",
" <td>426.0</td>\n",
" <td>713.50</td>\n",
" <td>0.004779</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-02-04 17:54:00</td>\n",
" <td>23.15</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>708.25</td>\n",
" <td>0.004772</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2015-02-04 17:55:00</td>\n",
" <td>23.10</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>704.50</td>\n",
" <td>0.004757</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Temperature Humidity ... CO2 HumidityRatio Occupancy\n",
"1 2015-02-04 17:51:00 23.18 27.2720 ... 721.25 0.004793 1\n",
"2 2015-02-04 17:51:59 23.15 27.2675 ... 714.00 0.004783 1\n",
"3 2015-02-04 17:53:00 23.15 27.2450 ... 713.50 0.004779 1\n",
"4 2015-02-04 17:54:00 23.15 27.2000 ... 708.25 0.004772 1\n",
"5 2015-02-04 17:55:00 23.10 27.2000 ... 704.50 0.004757 1\n",
"\n",
"[5 rows x 7 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 63
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "FxPSP0Py6sFf",
"colab_type": "code",
"outputId": "c08e09a9-6448-4db9-dfe2-a423d7b50d17",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 241
}
},
"source": [
"dataset3.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-02-11 14:48:00</td>\n",
" <td>21.7600</td>\n",
" <td>31.133333</td>\n",
" <td>437.333333</td>\n",
" <td>1029.666667</td>\n",
" <td>0.005021</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-02-11 14:49:00</td>\n",
" <td>21.7900</td>\n",
" <td>31.000000</td>\n",
" <td>437.333333</td>\n",
" <td>1000.000000</td>\n",
" <td>0.005009</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-02-11 14:50:00</td>\n",
" <td>21.7675</td>\n",
" <td>31.122500</td>\n",
" <td>434.000000</td>\n",
" <td>1003.750000</td>\n",
" <td>0.005022</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-02-11 14:51:00</td>\n",
" <td>21.7675</td>\n",
" <td>31.122500</td>\n",
" <td>439.000000</td>\n",
" <td>1009.500000</td>\n",
" <td>0.005022</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2015-02-11 14:51:59</td>\n",
" <td>21.7900</td>\n",
" <td>31.133333</td>\n",
" <td>437.333333</td>\n",
" <td>1005.666667</td>\n",
" <td>0.005030</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Temperature ... HumidityRatio Occupancy\n",
"1 2015-02-11 14:48:00 21.7600 ... 0.005021 1\n",
"2 2015-02-11 14:49:00 21.7900 ... 0.005009 1\n",
"3 2015-02-11 14:50:00 21.7675 ... 0.005022 1\n",
"4 2015-02-11 14:51:00 21.7675 ... 0.005022 1\n",
"5 2015-02-11 14:51:59 21.7900 ... 0.005030 1\n",
"\n",
"[5 rows x 7 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 64
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "KmCCBhYQ5wK8",
"colab_type": "code",
"colab": {}
},
"source": [
"class Logisticr():\n",
"\n",
" def __init__(self, X, y, alg, eta, w):\n",
" self.X = X\n",
" self.y = y\n",
" self.alg = alg\n",
" self.eta = eta\n",
" self.w = w\n",
" self.m, self.n = np.shape(X)\n",
"\n",
" def sigFunc(self, z):\n",
" return 1.0 / (1.0 + np.exp( -z ))\n",
"\n",
" def decide(self, x):\n",
" return np.where(x >= 0.5, 1, 0)\n",
"\n",
" def costfunc(self, w, X, y):\n",
"\n",
" w = w.reshape((self.n,1))\n",
" z = X * w\n",
" phi =self.sigFunc(z)\n",
"\n",
" # calculating the cost function\n",
" part1 = np.multiply(y, np.log(phi))\n",
" part2 = np.multiply((1 - y), np.log(1 - phi))\n",
"\n",
" J = (-part1 - part2).sum()/self.m\n",
"\n",
" # calculating the gradient\n",
" grad = X.T * (phi - y) / self.m\n",
" \n",
" return J, grad\n",
"\n",
" def graddescent(self, maxiter):\n",
" self.J = []\n",
" self.epoch = []\n",
" for i in range(0, maxiter):\n",
" J, grad = self.costfunc(self.w, self.X, self.y)\n",
" self.J.append(J)\n",
" self.epoch.append(i)\n",
" self.w = self.w - self.eta*grad\n",
"\n",
" return self.w\n",
"\n",
" def fit(self):\n",
"\n",
"\n",
" if self.alg == 0:\n",
" _maxiter = 100000\n",
" self.w = self.graddescent(_maxiter)\n",
" else:\n",
" Result = op.minimize(fun = self.costfunc,\n",
" x0 = self.w,\n",
" args = (self.X, self.y),\n",
" method = 'TNC',\n",
" jac = True);\n",
"\n",
" self.w = Result.x\n",
" self.w = np.matrix(self.w).T\n",
"\n",
" print(\"Found Solution:\")\n",
" print(self.w)\n",
"\n",
" z = self.X * self.w\n",
" phi = self.sigFunc(z)\n",
" correctAnswer = np.where(np.array(self.y == self.decide(phi)) == True, 1, 0)\n",
"\n",
" accuracy = float(sum(correctAnswer)) / len(correctAnswer)\n",
" \n",
" print(\"Train Accuracy: %f\" %accuracy)\n",
"\n",
" def plot(self):\n",
"\n",
" if self.n == 3:\n",
" ind_1 = np.where(self.y == 1)\n",
" ind_0 = np.where(self.y == 0)\n",
"\n",
" x1_1 = self.X[:, [1]].min()\n",
" x1_2 = self.X[:, [1]].max()\n",
"\n",
" x2_1 = -(self.w[0, 0] + self.w[1, 0]*x1_1)/self.w[2, 0]\n",
" x2_2 = -(self.w[0, 0] + self.w[1, 0]*x1_2)/self.w[2, 0]\n",
"\n",
" plt.plot(self.X[ind_1, [1]], self.X[ind_1, [2]], \"bo\", markersize=3)\n",
" plt.plot(self.X[ind_0, [1]], self.X[ind_0, [2]], \"ro\", markersize=3)\n",
"\n",
" plt.plot([x1_1, x1_2], [x2_1, x2_2], \"g-\")\n",
"\n",
" plt.xlabel(\"Feature 1\")\n",
" plt.ylabel(\"Feature 2\")\n",
" plt.title(\"Decision boundary\")\n",
" plt.show()\n",
" \n",
" def lplot(self):\n",
" plt.plot(self.epoch, self.J)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "9Jem6tRb5wLA",
"colab_type": "code",
"outputId": "72943bbf-3a08-4d9d-f641-f51a6621eec9",
"colab": {}
},
"source": [
"_X, _y = dataset1.iloc[:,1:3], dataset1.iloc[:,6]\n",
"\n",
"_m = np.shape(_X)[0]\n",
"\n",
"# add a column of 1\n",
"_X = np.hstack((np.matrix(np.ones((_m, 1))),_X))\n",
"_y = np.matrix(_y).reshape(_y.shape[0],1)\n",
"\n",
"_alg = 0\n",
"\n",
"_eta = 0.001\n",
"\n",
"_n= np.shape(_X)[1]\n",
"\n",
"_w = np.matrix(np.zeros((_n, 1)))\n",
"\n",
"# creating an instance of the Logisticr \n",
"lr = Logisticr(_X, _y, _alg, _eta, _w);\n",
"lr.fit()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Number of Features: 3\n",
"Found Solution:\n",
"[[-1.39917176]\n",
" [-0.464796 ]\n",
" [ 0.42665554]]\n",
"Train Accuracy: 0.703189\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "M0qC6Dqz5wLB",
"colab_type": "code",
"outputId": "7c2ee458-ef8b-439b-8fd2-47eacd136cbc",
"colab": {}
},
"source": [
"lr.lplot()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x1149ab310>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "4iRBcNfa5wLF",
"colab_type": "code",
"outputId": "bfc2612c-6a24-47cb-86e6-cf907c53d7d3",
"colab": {}
},
"source": [
"_X, _y = dataset1.iloc[:,1:6], dataset1.iloc[:,6]\n",
"\n",
"_m = np.shape(_X)[0]\n",
"\n",
"_X = np.hstack((np.matrix(np.ones((_m, 1))),_X))\n",
"_y = np.matrix(_y).reshape(_y.shape[0],1)\n",
"\n",
"_alg = 0\n",
"\n",
"_eta = 0.001\n",
"\n",
"_n= np.shape(_X)[1]\n",
"\n",
"_w = np.matrix(np.zeros((_n, 1)))\n",
" \n",
"lr = Logisticr(_X, _y, _alg, _eta, _w);\n",
"lr.fit()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Number of Features: 6\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:31: RuntimeWarning: divide by zero encountered in log\n",
"anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:31: RuntimeWarning: invalid value encountered in multiply\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"Found Solution:\n",
"[[-1.61062034e-01]\n",
" [-3.85343233e+00]\n",
" [-1.71831130e+00]\n",
" [ 2.76715798e-01]\n",
" [ 1.03495925e-01]\n",
" [-3.16666255e-04]]\n",
"Train Accuracy: 0.970732\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "K8QAcsVN5wLG",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment