Skip to content

Instantly share code, notes, and snippets.

@aegorenkov
Created April 11, 2016 19:44
Show Gist options
  • Save aegorenkov/6a78fd0074664318a32b13c7ed0bc781 to your computer and use it in GitHub Desktop.
Save aegorenkov/6a78fd0074664318a32b13c7ed0bc781 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ri</th>\n",
" <th>na</th>\n",
" <th>mg</th>\n",
" <th>al</th>\n",
" <th>si</th>\n",
" <th>k</th>\n",
" <th>ca</th>\n",
" <th>ba</th>\n",
" <th>fe</th>\n",
" <th>glass_type</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>1.51966</td>\n",
" <td>14.77</td>\n",
" <td>3.75</td>\n",
" <td>0.29</td>\n",
" <td>72.02</td>\n",
" <td>0.03</td>\n",
" <td>9.00</td>\n",
" <td>0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>185</th>\n",
" <td>1.51115</td>\n",
" <td>17.38</td>\n",
" <td>0.00</td>\n",
" <td>0.34</td>\n",
" <td>75.41</td>\n",
" <td>0.00</td>\n",
" <td>6.65</td>\n",
" <td>0</td>\n",
" <td>0.00</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>1.52213</td>\n",
" <td>14.21</td>\n",
" <td>3.82</td>\n",
" <td>0.47</td>\n",
" <td>71.77</td>\n",
" <td>0.11</td>\n",
" <td>9.57</td>\n",
" <td>0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>1.52213</td>\n",
" <td>14.21</td>\n",
" <td>3.82</td>\n",
" <td>0.47</td>\n",
" <td>71.77</td>\n",
" <td>0.11</td>\n",
" <td>9.57</td>\n",
" <td>0</td>\n",
" <td>0.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>1.52320</td>\n",
" <td>13.72</td>\n",
" <td>3.72</td>\n",
" <td>0.51</td>\n",
" <td>71.75</td>\n",
" <td>0.09</td>\n",
" <td>10.06</td>\n",
" <td>0</td>\n",
" <td>0.16</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ri na mg al si k ca ba fe glass_type\n",
"id \n",
"22 1.51966 14.77 3.75 0.29 72.02 0.03 9.00 0 0.00 1\n",
"185 1.51115 17.38 0.00 0.34 75.41 0.00 6.65 0 0.00 6\n",
"40 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0 0.00 1\n",
"39 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0 0.00 1\n",
"51 1.52320 13.72 3.72 0.51 71.75 0.09 10.06 0 0.16 1"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data'\n",
"col_names = ['id','ri','na','mg','al','si','k','ca','ba','fe','glass_type']\n",
"glass = pd.read_csv(url, names=col_names, index_col='id')\n",
"glass.sort_values('al', inplace=True)\n",
"glass.head()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"glass['household'] = glass.glass_type.map({1:0, 2:0, 3:0, 5:1, 6:1, 7:1})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"glass_normal = glass"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"glass_doubled = pd.concat([glass_normal, glass[glass.household == 1]], axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 163\n",
"1 51\n",
"Name: household, dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"glass_normal.household.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 163\n",
"1 102\n",
"Name: household, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"glass_doubled.household.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 4.18040386]])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Run normal regression on normal data\n",
"\n",
"logreg = LogisticRegression(C=1e9)\n",
"feature_cols = ['al']\n",
"X = glass_normal[feature_cols]\n",
"y = glass_normal.household\n",
"logreg.fit(X, y)\n",
"logreg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3.85191349]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Run weighted regression on normal data\n",
"\n",
"logreg = LogisticRegression(C=1e9, class_weight={0:1, 1:2})\n",
"feature_cols = ['al']\n",
"X = glass_normal[feature_cols]\n",
"y = glass_normal.household\n",
"logreg.fit(X, y)\n",
"logreg.coef_"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3.85191349]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Run normal regression on oversampled data\n",
"\n",
"logreg = LogisticRegression(C=1e9)\n",
"feature_cols = ['al']\n",
"X = glass_doubled[feature_cols]\n",
"y = glass_doubled.household\n",
"logreg.fit(X, y)\n",
"logreg.coef_"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment