Skip to content

Instantly share code, notes, and snippets.

@treksis
Created December 8, 2019 20:02
Show Gist options
  • Save treksis/d4d388b2659f2585c7f1345aa35c27cd to your computer and use it in GitHub Desktop.
Save treksis/d4d388b2659f2585c7f1345aa35c27cd to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pylab as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"filename = \"https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/auto.csv\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"headers = [\"symboling\",\"normalized-losses\",\"make\",\"fuel-type\",\"aspiration\", \"num-of-doors\",\"body-style\",\n",
" \"drive-wheels\",\"engine-location\",\"wheel-base\", \"length\",\"width\",\"height\",\"curb-weight\",\"engine-type\",\n",
" \"num-of-cylinders\", \"engine-size\",\"fuel-system\",\"bore\",\"stroke\",\"compression-ratio\",\"horsepower\",\n",
" \"peak-rpm\",\"city-mpg\",\"highway-mpg\",\"price\"]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df=pd.read_csv(filename, names = headers)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>symboling</th>\n",
" <th>normalized-losses</th>\n",
" <th>make</th>\n",
" <th>fuel-type</th>\n",
" <th>aspiration</th>\n",
" <th>num-of-doors</th>\n",
" <th>body-style</th>\n",
" <th>drive-wheels</th>\n",
" <th>engine-location</th>\n",
" <th>wheel-base</th>\n",
" <th>...</th>\n",
" <th>engine-size</th>\n",
" <th>fuel-system</th>\n",
" <th>bore</th>\n",
" <th>stroke</th>\n",
" <th>compression-ratio</th>\n",
" <th>horsepower</th>\n",
" <th>peak-rpm</th>\n",
" <th>city-mpg</th>\n",
" <th>highway-mpg</th>\n",
" <th>price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>?</td>\n",
" <td>alfa-romero</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>two</td>\n",
" <td>convertible</td>\n",
" <td>rwd</td>\n",
" <td>front</td>\n",
" <td>88.6</td>\n",
" <td>...</td>\n",
" <td>130</td>\n",
" <td>mpfi</td>\n",
" <td>3.47</td>\n",
" <td>2.68</td>\n",
" <td>9.0</td>\n",
" <td>111</td>\n",
" <td>5000</td>\n",
" <td>21</td>\n",
" <td>27</td>\n",
" <td>13495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>?</td>\n",
" <td>alfa-romero</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>two</td>\n",
" <td>convertible</td>\n",
" <td>rwd</td>\n",
" <td>front</td>\n",
" <td>88.6</td>\n",
" <td>...</td>\n",
" <td>130</td>\n",
" <td>mpfi</td>\n",
" <td>3.47</td>\n",
" <td>2.68</td>\n",
" <td>9.0</td>\n",
" <td>111</td>\n",
" <td>5000</td>\n",
" <td>21</td>\n",
" <td>27</td>\n",
" <td>16500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>?</td>\n",
" <td>alfa-romero</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>two</td>\n",
" <td>hatchback</td>\n",
" <td>rwd</td>\n",
" <td>front</td>\n",
" <td>94.5</td>\n",
" <td>...</td>\n",
" <td>152</td>\n",
" <td>mpfi</td>\n",
" <td>2.68</td>\n",
" <td>3.47</td>\n",
" <td>9.0</td>\n",
" <td>154</td>\n",
" <td>5000</td>\n",
" <td>19</td>\n",
" <td>26</td>\n",
" <td>16500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2</td>\n",
" <td>164</td>\n",
" <td>audi</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>four</td>\n",
" <td>sedan</td>\n",
" <td>fwd</td>\n",
" <td>front</td>\n",
" <td>99.8</td>\n",
" <td>...</td>\n",
" <td>109</td>\n",
" <td>mpfi</td>\n",
" <td>3.19</td>\n",
" <td>3.40</td>\n",
" <td>10.0</td>\n",
" <td>102</td>\n",
" <td>5500</td>\n",
" <td>24</td>\n",
" <td>30</td>\n",
" <td>13950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>164</td>\n",
" <td>audi</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>four</td>\n",
" <td>sedan</td>\n",
" <td>4wd</td>\n",
" <td>front</td>\n",
" <td>99.4</td>\n",
" <td>...</td>\n",
" <td>136</td>\n",
" <td>mpfi</td>\n",
" <td>3.19</td>\n",
" <td>3.40</td>\n",
" <td>8.0</td>\n",
" <td>115</td>\n",
" <td>5500</td>\n",
" <td>18</td>\n",
" <td>22</td>\n",
" <td>17450</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 26 columns</p>\n",
"</div>"
],
"text/plain": [
" symboling normalized-losses make fuel-type aspiration num-of-doors \\\n",
"0 3 ? alfa-romero gas std two \n",
"1 3 ? alfa-romero gas std two \n",
"2 1 ? alfa-romero gas std two \n",
"3 2 164 audi gas std four \n",
"4 2 164 audi gas std four \n",
"\n",
" body-style drive-wheels engine-location wheel-base ... engine-size \\\n",
"0 convertible rwd front 88.6 ... 130 \n",
"1 convertible rwd front 88.6 ... 130 \n",
"2 hatchback rwd front 94.5 ... 152 \n",
"3 sedan fwd front 99.8 ... 109 \n",
"4 sedan 4wd front 99.4 ... 136 \n",
"\n",
" fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n",
"0 mpfi 3.47 2.68 9.0 111 5000 21 \n",
"1 mpfi 3.47 2.68 9.0 111 5000 21 \n",
"2 mpfi 2.68 3.47 9.0 154 5000 19 \n",
"3 mpfi 3.19 3.40 10.0 102 5500 24 \n",
"4 mpfi 3.19 3.40 8.0 115 5500 18 \n",
"\n",
" highway-mpg price \n",
"0 27 13495 \n",
"1 27 16500 \n",
"2 26 16500 \n",
"3 30 13950 \n",
"4 22 17450 \n",
"\n",
"[5 rows x 26 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>symboling</th>\n",
" <th>normalized-losses</th>\n",
" <th>make</th>\n",
" <th>fuel-type</th>\n",
" <th>aspiration</th>\n",
" <th>num-of-doors</th>\n",
" <th>body-style</th>\n",
" <th>drive-wheels</th>\n",
" <th>engine-location</th>\n",
" <th>wheel-base</th>\n",
" <th>...</th>\n",
" <th>engine-size</th>\n",
" <th>fuel-system</th>\n",
" <th>bore</th>\n",
" <th>stroke</th>\n",
" <th>compression-ratio</th>\n",
" <th>horsepower</th>\n",
" <th>peak-rpm</th>\n",
" <th>city-mpg</th>\n",
" <th>highway-mpg</th>\n",
" <th>price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>alfa-romero</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>two</td>\n",
" <td>convertible</td>\n",
" <td>rwd</td>\n",
" <td>front</td>\n",
" <td>88.6</td>\n",
" <td>...</td>\n",
" <td>130</td>\n",
" <td>mpfi</td>\n",
" <td>3.47</td>\n",
" <td>2.68</td>\n",
" <td>9.0</td>\n",
" <td>111</td>\n",
" <td>5000</td>\n",
" <td>21</td>\n",
" <td>27</td>\n",
" <td>13495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>alfa-romero</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>two</td>\n",
" <td>convertible</td>\n",
" <td>rwd</td>\n",
" <td>front</td>\n",
" <td>88.6</td>\n",
" <td>...</td>\n",
" <td>130</td>\n",
" <td>mpfi</td>\n",
" <td>3.47</td>\n",
" <td>2.68</td>\n",
" <td>9.0</td>\n",
" <td>111</td>\n",
" <td>5000</td>\n",
" <td>21</td>\n",
" <td>27</td>\n",
" <td>16500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>alfa-romero</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>two</td>\n",
" <td>hatchback</td>\n",
" <td>rwd</td>\n",
" <td>front</td>\n",
" <td>94.5</td>\n",
" <td>...</td>\n",
" <td>152</td>\n",
" <td>mpfi</td>\n",
" <td>2.68</td>\n",
" <td>3.47</td>\n",
" <td>9.0</td>\n",
" <td>154</td>\n",
" <td>5000</td>\n",
" <td>19</td>\n",
" <td>26</td>\n",
" <td>16500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2</td>\n",
" <td>164</td>\n",
" <td>audi</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>four</td>\n",
" <td>sedan</td>\n",
" <td>fwd</td>\n",
" <td>front</td>\n",
" <td>99.8</td>\n",
" <td>...</td>\n",
" <td>109</td>\n",
" <td>mpfi</td>\n",
" <td>3.19</td>\n",
" <td>3.40</td>\n",
" <td>10.0</td>\n",
" <td>102</td>\n",
" <td>5500</td>\n",
" <td>24</td>\n",
" <td>30</td>\n",
" <td>13950</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>164</td>\n",
" <td>audi</td>\n",
" <td>gas</td>\n",
" <td>std</td>\n",
" <td>four</td>\n",
" <td>sedan</td>\n",
" <td>4wd</td>\n",
" <td>front</td>\n",
" <td>99.4</td>\n",
" <td>...</td>\n",
" <td>136</td>\n",
" <td>mpfi</td>\n",
" <td>3.19</td>\n",
" <td>3.40</td>\n",
" <td>8.0</td>\n",
" <td>115</td>\n",
" <td>5500</td>\n",
" <td>18</td>\n",
" <td>22</td>\n",
" <td>17450</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 26 columns</p>\n",
"</div>"
],
"text/plain": [
" symboling normalized-losses make fuel-type aspiration num-of-doors \\\n",
"0 3 NaN alfa-romero gas std two \n",
"1 3 NaN alfa-romero gas std two \n",
"2 1 NaN alfa-romero gas std two \n",
"3 2 164 audi gas std four \n",
"4 2 164 audi gas std four \n",
"\n",
" body-style drive-wheels engine-location wheel-base ... engine-size \\\n",
"0 convertible rwd front 88.6 ... 130 \n",
"1 convertible rwd front 88.6 ... 130 \n",
"2 hatchback rwd front 94.5 ... 152 \n",
"3 sedan fwd front 99.8 ... 109 \n",
"4 sedan 4wd front 99.4 ... 136 \n",
"\n",
" fuel-system bore stroke compression-ratio horsepower peak-rpm city-mpg \\\n",
"0 mpfi 3.47 2.68 9.0 111 5000 21 \n",
"1 mpfi 3.47 2.68 9.0 111 5000 21 \n",
"2 mpfi 2.68 3.47 9.0 154 5000 19 \n",
"3 mpfi 3.19 3.40 10.0 102 5500 24 \n",
"4 mpfi 3.19 3.40 8.0 115 5500 18 \n",
"\n",
" highway-mpg price \n",
"0 27 13495 \n",
"1 27 16500 \n",
"2 26 16500 \n",
"3 30 13950 \n",
"4 22 17450 \n",
"\n",
"[5 rows x 26 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Covert ? into NaN\n",
"\n",
"import numpy as np\n",
"\n",
"df.replace(\"?\", np.nan, inplace = True)\n",
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>symboling</th>\n",
" <th>normalized-losses</th>\n",
" <th>make</th>\n",
" <th>fuel-type</th>\n",
" <th>aspiration</th>\n",
" <th>num-of-doors</th>\n",
" <th>body-style</th>\n",
" <th>drive-wheels</th>\n",
" <th>engine-location</th>\n",
" <th>wheel-base</th>\n",
" <th>...</th>\n",
" <th>engine-size</th>\n",
" <th>fuel-system</th>\n",
" <th>bore</th>\n",
" <th>stroke</th>\n",
" <th>compression-ratio</th>\n",
" <th>horsepower</th>\n",
" <th>peak-rpm</th>\n",
" <th>city-mpg</th>\n",
" <th>highway-mpg</th>\n",
" <th>price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 26 columns</p>\n",
"</div>"
],
"text/plain": [
" symboling normalized-losses make fuel-type aspiration num-of-doors \\\n",
"0 False True False False False False \n",
"1 False True False False False False \n",
"2 False True False False False False \n",
"3 False False False False False False \n",
"4 False False False False False False \n",
"\n",
" body-style drive-wheels engine-location wheel-base ... engine-size \\\n",
"0 False False False False ... False \n",
"1 False False False False ... False \n",
"2 False False False False ... False \n",
"3 False False False False ... False \n",
"4 False False False False ... False \n",
"\n",
" fuel-system bore stroke compression-ratio horsepower peak-rpm \\\n",
"0 False False False False False False \n",
"1 False False False False False False \n",
"2 False False False False False False \n",
"3 False False False False False False \n",
"4 False False False False False False \n",
"\n",
" city-mpg highway-mpg price \n",
"0 False False False \n",
"1 False False False \n",
"2 False False False \n",
"3 False False False \n",
"4 False False False \n",
"\n",
"[5 rows x 26 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"missing_data=df.isnull()\n",
"missing_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"symboling\n",
"False 205\n",
"Name: symboling, dtype: int64\n",
"\n",
"normalized-losses\n",
"False 164\n",
"True 41\n",
"Name: normalized-losses, dtype: int64\n",
"\n",
"make\n",
"False 205\n",
"Name: make, dtype: int64\n",
"\n",
"fuel-type\n",
"False 205\n",
"Name: fuel-type, dtype: int64\n",
"\n",
"aspiration\n",
"False 205\n",
"Name: aspiration, dtype: int64\n",
"\n",
"num-of-doors\n",
"False 203\n",
"True 2\n",
"Name: num-of-doors, dtype: int64\n",
"\n",
"body-style\n",
"False 205\n",
"Name: body-style, dtype: int64\n",
"\n",
"drive-wheels\n",
"False 205\n",
"Name: drive-wheels, dtype: int64\n",
"\n",
"engine-location\n",
"False 205\n",
"Name: engine-location, dtype: int64\n",
"\n",
"wheel-base\n",
"False 205\n",
"Name: wheel-base, dtype: int64\n",
"\n",
"length\n",
"False 205\n",
"Name: length, dtype: int64\n",
"\n",
"width\n",
"False 205\n",
"Name: width, dtype: int64\n",
"\n",
"height\n",
"False 205\n",
"Name: height, dtype: int64\n",
"\n",
"curb-weight\n",
"False 205\n",
"Name: curb-weight, dtype: int64\n",
"\n",
"engine-type\n",
"False 205\n",
"Name: engine-type, dtype: int64\n",
"\n",
"num-of-cylinders\n",
"False 205\n",
"Name: num-of-cylinders, dtype: int64\n",
"\n",
"engine-size\n",
"False 205\n",
"Name: engine-size, dtype: int64\n",
"\n",
"fuel-system\n",
"False 205\n",
"Name: fuel-system, dtype: int64\n",
"\n",
"bore\n",
"False 201\n",
"True 4\n",
"Name: bore, dtype: int64\n",
"\n",
"stroke\n",
"False 201\n",
"True 4\n",
"Name: stroke, dtype: int64\n",
"\n",
"compression-ratio\n",
"False 205\n",
"Name: compression-ratio, dtype: int64\n",
"\n",
"horsepower\n",
"False 203\n",
"True 2\n",
"Name: horsepower, dtype: int64\n",
"\n",
"peak-rpm\n",
"False 203\n",
"True 2\n",
"Name: peak-rpm, dtype: int64\n",
"\n",
"city-mpg\n",
"False 205\n",
"Name: city-mpg, dtype: int64\n",
"\n",
"highway-mpg\n",
"False 205\n",
"Name: highway-mpg, dtype: int64\n",
"\n",
"price\n",
"False 201\n",
"True 4\n",
"Name: price, dtype: int64\n",
"\n"
]
}
],
"source": [
"for column in missing_data.columns.values.tolist():\n",
" print(column)\n",
" print(missing_data[column].value_counts())\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Average of normalized-losses: 122.0\n"
]
}
],
"source": [
"avg_norm_loss = df[\"normalized-losses\"].astype(\"float\").mean(axis=0)\n",
"print(\"Average of normalized-losses:\", avg_norm_loss)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df[\"normalized-losses\"].replace(np.nan, avg_norm_loss, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Average of bore: 3.3297512437810943\n"
]
}
],
"source": [
"avg_bore=df['bore'].astype(\"float\").mean(axis=0)\n",
"print(\"Average of bore:\", avg_bore)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"df[\"bore\"].replace(np.nan, avg_bore, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Average of stroke: 3.255422885572139\n"
]
}
],
"source": [
"#q1\n",
"\n",
"avg_stroke=df['stroke'].astype(\"float\").mean(axis=0)\n",
"print(\"Average of stroke:\", avg_stroke)\n",
"\n",
"df[\"stroke\"].replace(np.nan, avg_stroke, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"average horsepower: 104.25615763546799\n"
]
}
],
"source": [
"avg_horsepower=df['horsepower'].astype(\"float\").mean(axis=0)\n",
"print(\"average horsepower:\", avg_horsepower)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"df['horsepower'].replace(np.nan, avg_horsepower, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"average peak rpm: 5125.369458128079\n"
]
}
],
"source": [
"avg_peakrpm=df['peak-rpm'].astype('float').mean(axis=0)\n",
"print(\"average peak rpm:\", avg_peakrpm)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"df['peak-rpm'].replace(np.nan, avg_peakrpm, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"four 114\n",
"two 89\n",
"Name: num-of-doors, dtype: int64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['num-of-doors'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'four'"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['num-of-doors'].value_counts().idxmax()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"df['num-of-doors'].replace(np.nan, \"four\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python",
"language": "python",
"name": "conda-env-python-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment