Skip to content

Instantly share code, notes, and snippets.

@Jargon4072
Last active June 24, 2020 06:45
Show Gist options
  • Save Jargon4072/29c976793f5a3b1fd50fff1595d38743 to your computer and use it in GitHub Desktop.
Save Jargon4072/29c976793f5a3b1fd50fff1595d38743 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Linear regression Implementation from scratch\n",
"\n",
"Data used is obtaines from https://people.sc.fsu.edu/~jburkardt/datasets/regression/x01.txt\n",
"data csv can also be found here: https://drive.google.com/file/d/1KCgwg7ZEHDhyhRB7SaTECCI8ybq82Z52/view\n",
"\n",
"this implementation is focused on learning and understanding Cost Function and Gradient descent for linear regression.\n",
"\n",
"More tuning and improvements can be done. "
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Index</th>\n",
" <th>Brain Weight</th>\n",
" <th>Body Weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>3.385</td>\n",
" <td>44.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.480</td>\n",
" <td>15.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1.350</td>\n",
" <td>8.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>465.000</td>\n",
" <td>423.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>36.330</td>\n",
" <td>119.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Index Brain Weight Body Weight\n",
"0 1 3.385 44.5\n",
"1 2 0.480 15.5\n",
"2 3 1.350 8.1\n",
"3 4 465.000 423.0\n",
"4 5 36.330 119.5"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
" \n",
"data = pd.read_csv('./x01.csv')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Brain Weight</th>\n",
" <th>Body Weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.385</td>\n",
" <td>44.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.480</td>\n",
" <td>15.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.350</td>\n",
" <td>8.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>465.000</td>\n",
" <td>423.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>36.330</td>\n",
" <td>119.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Brain Weight Body Weight\n",
"0 3.385 44.5\n",
"1 0.480 15.5\n",
"2 1.350 8.1\n",
"3 465.000 423.0\n",
"4 36.330 119.5"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data=data.drop(\"Index\",axis=1)\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Brain_weight</th>\n",
" <th>Body_weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.385</td>\n",
" <td>44.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.480</td>\n",
" <td>15.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.350</td>\n",
" <td>8.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>465.000</td>\n",
" <td>423.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>36.330</td>\n",
" <td>119.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Brain_weight Body_weight\n",
"0 3.385 44.5\n",
"1 0.480 15.5\n",
"2 1.350 8.1\n",
"3 465.000 423.0\n",
"4 36.330 119.5"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns=[\"Brain_weight\",\"Body_weight\"]\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"X=data[\"Brain_weight\"]\n",
"y=data[\"Body_weight\"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"\n",
"from sklearn.model_selection import train_test_split \n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/5, random_state=0)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from numpy import *"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Brain_weight</th>\n",
" <th>Body_weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>62.00</td>\n",
" <td>1320.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>55.50</td>\n",
" <td>175.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>35.00</td>\n",
" <td>56.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>52.16</td>\n",
" <td>440.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>0.28</td>\n",
" <td>1.9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Brain_weight Body_weight\n",
"30 62.00 1320.0\n",
"41 55.50 175.0\n",
"33 35.00 56.0\n",
"43 52.16 440.0\n",
"49 0.28 1.9"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfpoint = pd.DataFrame({'Brain_weight': X_train, 'Body_weight': y_train})\n",
"dfpoint.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# y = mx + b\n",
"# m is slope, b is y-intercept\n",
"def compute_error_for_line_given_points(b, m, points):\n",
" totalError = 0\n",
" for i in range(0, len(points)):\n",
" x = points[i, 0]\n",
" y = points[i, 1]\n",
" totalError += (y - (m * x + b)) ** 2\n",
" return totalError / float(len(points))\n",
"\n",
"def step_gradient(b_current, m_current, points, learningRate):\n",
" b_gradient = 0\n",
" m_gradient = 0\n",
" N = float(len(points))\n",
" for i in range(0, len(points)):\n",
" x = points[i, 0]\n",
" y = points[i, 1]\n",
" b_gradient += -(2/N) * (y - ((m_current * x) + b_current))\n",
" m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))\n",
" new_b = b_current - (learningRate * b_gradient)\n",
" new_m = m_current - (learningRate * m_gradient)\n",
" return [new_b, new_m]\n",
"\n",
"def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):\n",
" b = starting_b\n",
" m = starting_m\n",
" for i in range(num_iterations):\n",
" b, m = step_gradient(b, m, array(points), learning_rate)\n",
" return [b, m]\n",
"\n",
"def run_lr(res):\n",
" #points = genfromtxt(\"./x01.csv\", delimiter=\",\")\n",
" points=dfpoint\n",
" learning_rate = 0.000001\n",
" initial_b = 0 # initial y-intercept guess\n",
" initial_m = 0 # initial slope guess\n",
" num_iterations = 1000\n",
" print (\"Starting gradient descent at b = {0}, m = {1}, error = {2}\".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, array(points))))\n",
" print (\"Running...\")\n",
" [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)\n",
" print (\"After {0} iterations b = {1}, m = {2}, error = {3}\".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, array(points))))\n",
" print(b)\n",
" print(m)\n",
" res.append(b)\n",
" res.append(m)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting gradient descent at b = 0, m = 0, error = 69741.70002083335\n",
"Running...\n",
"After 1000 iterations b = 0.1037659425421265, m = 1.4380982734212167, error = 38857.61335587545\n",
"0.1037659425421265\n",
"1.4380982734212167\n",
"[0.1037659425421265, 1.4380982734212167]\n"
]
}
],
"source": [
"res=[]\n",
"run_lr(res)\n",
"print(res)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"test=array(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"529.0 760.8577525823658\n",
"0.12 0.2763377353526725\n",
"4.235 6.194112130480979\n",
"85.0 122.34211918334555\n",
"0.92 1.426816354089646\n",
"1.35 2.045198611660769\n",
"4.05 5.928063949898053\n",
"0.10400000000000001 0.25332816297793304\n",
"2.5 3.699011626095168\n",
"10.0 14.484748676754293\n",
"36.33 52.34987621593493\n",
"0.10099999999999999 0.24901386815766935\n",
"[760.8577525823658, 0.2763377353526725, 6.194112130480979, 122.34211918334555, 1.426816354089646, 2.045198611660769, 5.928063949898053, 0.25332816297793304, 3.699011626095168, 14.484748676754293, 52.34987621593493, 0.24901386815766935]\n"
]
}
],
"source": [
"def model_output(hx):\n",
" for i in range(len(test)):\n",
" xa=test[i]\n",
" print(xa, end=\" \")\n",
" y=res[1]*xa+res[0]\n",
" print(y)\n",
" hx.append(y)\n",
"\n",
"hx=[]\n",
"model_output(hx)\n",
"print(hx)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"x=data[\"Brain_weight\"]\n",
"y_orig=data[\"Body_weight\"]\n",
"viz_train = plt\n",
"viz_train.scatter(X_test, y_test, color='red')\n",
"viz_train.plot(X_test, hx, color='blue')\n",
"viz_train.title(' hθ(x) VS x plot for θ₀ = 0 and θ₁ = 1')\n",
"viz_train.xlabel('x')\n",
"viz_train.ylabel('hθ(x)')\n",
"viz_train.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment