Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save pushkarsaini18/45ad3a3fd47092043c2911ae4a56c601 to your computer and use it in GitHub Desktop.
Save pushkarsaini18/45ad3a3fd47092043c2911ae4a56c601 to your computer and use it in GitHub Desktop.
data clean
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"id": "7003089f",
"cell_type": "code",
"source": "import pandas as pd \nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns \nfrom scipy import stats",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "f0f1cbb7",
"cell_type": "code",
"source": "data=pd.read_csv(\"data_clean.csv\")",
"execution_count": 105,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "6822f668",
"cell_type": "code",
"source": "data",
"execution_count": 93,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 93,
"data": {
"text/plain": " Unnamed: 0 Ozone Solar.R Wind Temp C Month Day Year Temp Weather\n0 1 41.0 190.0 7.4 67 5 1 2010 67 S\n1 2 36.0 118.0 8.0 72 5 2 2010 72 C\n2 3 12.0 149.0 12.6 74 5 3 2010 74 PS\n3 4 18.0 313.0 11.5 62 5 4 2010 62 S\n4 5 NaN NaN 14.3 56 5 5 2010 56 S\n.. ... ... ... ... ... ... ... ... ... ...\n153 154 41.0 190.0 7.4 67 5 1 2010 67 C\n154 155 30.0 193.0 6.9 70 9 26 2010 70 PS\n155 156 NaN 145.0 13.2 77 9 27 2010 77 S\n156 157 14.0 191.0 14.3 75 9 28 2010 75 S\n157 158 18.0 131.0 8.0 76 9 29 2010 76 C\n\n[158 rows x 10 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>Ozone</th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>S</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>36.0</td>\n <td>118.0</td>\n <td>8.0</td>\n <td>72</td>\n <td>5</td>\n <td>2</td>\n <td>2010</td>\n <td>72</td>\n <td>C</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>12.0</td>\n <td>149.0</td>\n <td>12.6</td>\n <td>74</td>\n <td>5</td>\n <td>3</td>\n <td>2010</td>\n <td>74</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>18.0</td>\n <td>313.0</td>\n <td>11.5</td>\n <td>62</td>\n <td>5</td>\n <td>4</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>153</th>\n <td>154</td>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>C</td>\n </tr>\n <tr>\n <th>154</th>\n <td>155</td>\n <td>30.0</td>\n <td>193.0</td>\n <td>6.9</td>\n <td>70</td>\n <td>9</td>\n <td>26</td>\n <td>2010</td>\n <td>70</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>155</th>\n <td>156</td>\n <td>NaN</td>\n <td>145.0</td>\n <td>13.2</td>\n <td>77</td>\n <td>9</td>\n <td>27</td>\n <td>2010</td>\n <td>77</td>\n <td>S</td>\n </tr>\n <tr>\n <th>156</th>\n <td>157</td>\n <td>14.0</td>\n <td>191.0</td>\n <td>14.3</td>\n <td>75</td>\n <td>9</td>\n <td>28</td>\n <td>2010</td>\n <td>75</td>\n <td>S</td>\n </tr>\n <tr>\n <th>157</th>\n <td>158</td>\n <td>18.0</td>\n <td>131.0</td>\n <td>8.0</td>\n <td>76</td>\n <td>9</td>\n <td>29</td>\n <td>2010</td>\n <td>76</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n<p>158 rows × 10 columns</p>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": false
},
"id": "7f782003",
"cell_type": "code",
"source": "data",
"execution_count": 4,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>Ozone</th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>S</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>36.0</td>\n <td>118.0</td>\n <td>8.0</td>\n <td>72</td>\n <td>5</td>\n <td>2</td>\n <td>2010</td>\n <td>72</td>\n <td>C</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>12.0</td>\n <td>149.0</td>\n <td>12.6</td>\n <td>74</td>\n <td>5</td>\n <td>3</td>\n <td>2010</td>\n <td>74</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>18.0</td>\n <td>313.0</td>\n <td>11.5</td>\n <td>62</td>\n <td>5</td>\n <td>4</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>153</th>\n <td>154</td>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>C</td>\n </tr>\n <tr>\n <th>154</th>\n <td>155</td>\n <td>30.0</td>\n <td>193.0</td>\n <td>6.9</td>\n <td>70</td>\n <td>9</td>\n <td>26</td>\n <td>2010</td>\n <td>70</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>155</th>\n <td>156</td>\n <td>NaN</td>\n <td>145.0</td>\n <td>13.2</td>\n <td>77</td>\n <td>9</td>\n <td>27</td>\n <td>2010</td>\n <td>77</td>\n <td>S</td>\n </tr>\n <tr>\n <th>156</th>\n <td>157</td>\n <td>14.0</td>\n <td>191.0</td>\n <td>14.3</td>\n <td>75</td>\n <td>9</td>\n <td>28</td>\n <td>2010</td>\n <td>75</td>\n <td>S</td>\n </tr>\n <tr>\n <th>157</th>\n <td>158</td>\n <td>18.0</td>\n <td>131.0</td>\n <td>8.0</td>\n <td>76</td>\n <td>9</td>\n <td>29</td>\n <td>2010</td>\n <td>76</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n<p>158 rows × 10 columns</p>\n</div>",
"text/plain": " Unnamed: 0 Ozone Solar.R Wind Temp C Month Day Year Temp Weather\n0 1 41.0 190.0 7.4 67 5 1 2010 67 S\n1 2 36.0 118.0 8.0 72 5 2 2010 72 C\n2 3 12.0 149.0 12.6 74 5 3 2010 74 PS\n3 4 18.0 313.0 11.5 62 5 4 2010 62 S\n4 5 NaN NaN 14.3 56 5 5 2010 56 S\n.. ... ... ... ... ... ... ... ... ... ...\n153 154 41.0 190.0 7.4 67 5 1 2010 67 C\n154 155 30.0 193.0 6.9 70 9 26 2010 70 PS\n155 156 NaN 145.0 13.2 77 9 27 2010 77 S\n156 157 14.0 191.0 14.3 75 9 28 2010 75 S\n157 158 18.0 131.0 8.0 76 9 29 2010 76 C\n\n[158 rows x 10 columns]"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"id": "ab8c1d93",
"cell_type": "code",
"source": "data['Month']=pd.to_numeric(data['Month'])",
"execution_count": 53,
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "Unable to parse string \"May\" at position 23",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mpandas\\_libs\\lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: Unable to parse string \"May\"",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-53-24977ed8031b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Month'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Month'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m~\\anaconda\\lib\\site-packages\\pandas\\core\\tools\\numeric.py\u001b[0m in \u001b[0;36mto_numeric\u001b[1;34m(arg, errors, downcast)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[0mcoerce_numeric\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"raise\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 153\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 154\u001b[1;33m values = lib.maybe_convert_numeric(\n\u001b[0m\u001b[0;32m 155\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcoerce_numeric\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcoerce_numeric\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 156\u001b[0m )\n",
"\u001b[1;32mpandas\\_libs\\lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: Unable to parse string \"May\" at position 23"
]
}
]
},
{
"metadata": {
"trusted": true
},
"id": "68a8a821",
"cell_type": "code",
"source": "data['Month'].values",
"execution_count": 94,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 94,
"data": {
"text/plain": "array(['5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5',\n '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', 'May', '5', '5',\n '5', '5', '5', '5', '5', '6', '6', '6', '6', '6', '6', '6', '6',\n '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6',\n '6', '6', '6', '6', '6', '6', '6', '6', '6', '7', '7', '7', '7',\n '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7',\n '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7',\n '7', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8',\n '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8',\n '8', '8', '8', '8', '8', '8', '9', '9', '9', '9', '9', '9', '9',\n '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9',\n '9', '9', '9', '9', '9', '9', '9', '9', '9', '9', '5', '9', '9',\n '9', '9'], dtype=object)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "76f8904e",
"cell_type": "code",
"source": "data['Month']=pd.to_numeric(data['Month'],errors='coerce')",
"execution_count": 107,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "1fd0cd80",
"cell_type": "code",
"source": "data['Month']\n",
"execution_count": 108,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 108,
"data": {
"text/plain": "0 5.0\n1 5.0\n2 5.0\n3 5.0\n4 5.0\n ... \n153 5.0\n154 9.0\n155 9.0\n156 9.0\n157 9.0\nName: Month, Length: 158, dtype: float64"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": false
},
"id": "2da1cf9b",
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "c34e5dd5",
"cell_type": "code",
"source": "data['Weather']=data['Weather'].astype('category')",
"execution_count": 109,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "f2178be6",
"cell_type": "code",
"source": "data['Weather'].values",
"execution_count": 57,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 57,
"data": {
"text/plain": "['S', 'C', 'PS', 'S', 'S', ..., 'C', 'PS', 'S', 'S', 'C']\nLength: 158\nCategories (3, object): ['C', 'PS', 'S']"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "32629d78",
"cell_type": "code",
"source": "data.describe()",
"execution_count": 58,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 58,
"data": {
"text/plain": " Unnamed: 0 Ozone Solar.R Wind Month Day \\\ncount 158.000000 120.000000 151.000000 158.000000 157.000000 158.000000 \nmean 79.500000 41.583333 185.403974 9.957595 7.044586 16.006329 \nstd 45.754781 32.620709 88.723103 3.511261 1.433768 8.997166 \nmin 1.000000 1.000000 7.000000 1.700000 5.000000 1.000000 \n25% 40.250000 18.000000 119.000000 7.400000 6.000000 8.000000 \n50% 79.500000 30.500000 197.000000 9.700000 7.000000 16.000000 \n75% 118.750000 61.500000 257.000000 11.875000 8.000000 24.000000 \nmax 158.000000 168.000000 334.000000 20.700000 9.000000 31.000000 \n\n Year Temp \ncount 158.0 158.000000 \nmean 2010.0 77.727848 \nstd 0.0 9.377877 \nmin 2010.0 56.000000 \n25% 2010.0 72.000000 \n50% 2010.0 78.500000 \n75% 2010.0 84.000000 \nmax 2010.0 97.000000 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>Ozone</th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>count</th>\n <td>158.000000</td>\n <td>120.000000</td>\n <td>151.000000</td>\n <td>158.000000</td>\n <td>157.000000</td>\n <td>158.000000</td>\n <td>158.0</td>\n <td>158.000000</td>\n </tr>\n <tr>\n <th>mean</th>\n <td>79.500000</td>\n <td>41.583333</td>\n <td>185.403974</td>\n <td>9.957595</td>\n <td>7.044586</td>\n <td>16.006329</td>\n <td>2010.0</td>\n <td>77.727848</td>\n </tr>\n <tr>\n <th>std</th>\n <td>45.754781</td>\n <td>32.620709</td>\n <td>88.723103</td>\n <td>3.511261</td>\n <td>1.433768</td>\n <td>8.997166</td>\n <td>0.0</td>\n <td>9.377877</td>\n </tr>\n <tr>\n <th>min</th>\n <td>1.000000</td>\n <td>1.000000</td>\n <td>7.000000</td>\n <td>1.700000</td>\n <td>5.000000</td>\n <td>1.000000</td>\n <td>2010.0</td>\n <td>56.000000</td>\n </tr>\n <tr>\n <th>25%</th>\n <td>40.250000</td>\n <td>18.000000</td>\n <td>119.000000</td>\n <td>7.400000</td>\n <td>6.000000</td>\n <td>8.000000</td>\n <td>2010.0</td>\n <td>72.000000</td>\n </tr>\n <tr>\n <th>50%</th>\n <td>79.500000</td>\n <td>30.500000</td>\n <td>197.000000</td>\n <td>9.700000</td>\n <td>7.000000</td>\n <td>16.000000</td>\n <td>2010.0</td>\n <td>78.500000</td>\n </tr>\n <tr>\n <th>75%</th>\n <td>118.750000</td>\n <td>61.500000</td>\n <td>257.000000</td>\n <td>11.875000</td>\n <td>8.000000</td>\n <td>24.000000</td>\n <td>2010.0</td>\n <td>84.000000</td>\n </tr>\n <tr>\n <th>max</th>\n <td>158.000000</td>\n <td>168.000000</td>\n <td>334.000000</td>\n <td>20.700000</td>\n <td>9.000000</td>\n <td>31.000000</td>\n <td>2010.0</td>\n <td>97.000000</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "bee621fa",
"cell_type": "code",
"source": "data",
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 15,
"data": {
"text/plain": " Unnamed: 0 Ozone Solar.R Wind Temp C Month Day Year Temp Weather\n0 1 41.0 190.0 7.4 67 5.0 1 2010 67 S\n1 2 36.0 118.0 8.0 72 5.0 2 2010 72 C\n2 3 12.0 149.0 12.6 74 5.0 3 2010 74 PS\n3 4 18.0 313.0 11.5 62 5.0 4 2010 62 S\n4 5 NaN NaN 14.3 56 5.0 5 2010 56 S\n.. ... ... ... ... ... ... ... ... ... ...\n153 154 41.0 190.0 7.4 67 5.0 1 2010 67 C\n154 155 30.0 193.0 6.9 70 9.0 26 2010 70 PS\n155 156 NaN 145.0 13.2 77 9.0 27 2010 77 S\n156 157 14.0 191.0 14.3 75 9.0 28 2010 75 S\n157 158 18.0 131.0 8.0 76 9.0 29 2010 76 C\n\n[158 rows x 10 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>Ozone</th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>S</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>36.0</td>\n <td>118.0</td>\n <td>8.0</td>\n <td>72</td>\n <td>5.0</td>\n <td>2</td>\n <td>2010</td>\n <td>72</td>\n <td>C</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>12.0</td>\n <td>149.0</td>\n <td>12.6</td>\n <td>74</td>\n <td>5.0</td>\n <td>3</td>\n <td>2010</td>\n <td>74</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>18.0</td>\n <td>313.0</td>\n <td>11.5</td>\n <td>62</td>\n <td>5.0</td>\n <td>4</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>153</th>\n <td>154</td>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>C</td>\n </tr>\n <tr>\n <th>154</th>\n <td>155</td>\n <td>30.0</td>\n <td>193.0</td>\n <td>6.9</td>\n <td>70</td>\n <td>9.0</td>\n <td>26</td>\n <td>2010</td>\n <td>70</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>155</th>\n <td>156</td>\n <td>NaN</td>\n <td>145.0</td>\n <td>13.2</td>\n <td>77</td>\n <td>9.0</td>\n <td>27</td>\n <td>2010</td>\n <td>77</td>\n <td>S</td>\n </tr>\n <tr>\n <th>156</th>\n <td>157</td>\n <td>14.0</td>\n <td>191.0</td>\n <td>14.3</td>\n <td>75</td>\n <td>9.0</td>\n <td>28</td>\n <td>2010</td>\n <td>75</td>\n <td>S</td>\n </tr>\n <tr>\n <th>157</th>\n <td>158</td>\n <td>18.0</td>\n <td>131.0</td>\n <td>8.0</td>\n <td>76</td>\n <td>9.0</td>\n <td>29</td>\n <td>2010</td>\n <td>76</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n<p>158 rows × 10 columns</p>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "a67059e0",
"cell_type": "code",
"source": "data[data.duplicated()].shape",
"execution_count": 110,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 110,
"data": {
"text/plain": "(0, 10)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "72d852f4",
"cell_type": "code",
"source": "data.head(20)",
"execution_count": 111,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 111,
"data": {
"text/plain": " Unnamed: 0 Ozone Solar.R Wind Temp C Month Day Year Temp Weather\n0 1 41.0 190.0 7.4 67 5.0 1 2010 67 S\n1 2 36.0 118.0 8.0 72 5.0 2 2010 72 C\n2 3 12.0 149.0 12.6 74 5.0 3 2010 74 PS\n3 4 18.0 313.0 11.5 62 5.0 4 2010 62 S\n4 5 NaN NaN 14.3 56 5.0 5 2010 56 S\n5 6 28.0 NaN 14.9 66 5.0 6 2010 66 C\n6 7 23.0 299.0 8.6 65 5.0 7 2010 65 PS\n7 8 19.0 99.0 13.8 59 5.0 8 2010 59 C\n8 9 8.0 19.0 20.1 61 5.0 9 2010 61 PS\n9 10 NaN 194.0 8.6 69 5.0 10 2010 69 S\n10 11 7.0 NaN 6.9 C 5.0 11 2010 74 C\n11 12 16.0 256.0 9.7 69 5.0 12 2010 69 PS\n12 13 11.0 290.0 9.2 66 5.0 13 2010 66 S\n13 14 14.0 274.0 10.9 68 5.0 14 2010 68 S\n14 15 18.0 65.0 13.2 58 5.0 15 2010 58 C\n15 16 14.0 334.0 11.5 64 5.0 16 2010 64 S\n16 17 34.0 307.0 12.0 66 5.0 17 2010 66 S\n17 18 6.0 78.0 18.4 57 5.0 18 2010 57 C\n18 19 30.0 322.0 11.5 68 5.0 19 2010 68 PS\n19 20 11.0 44.0 9.7 62 5.0 20 2010 62 S",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>Ozone</th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>S</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>36.0</td>\n <td>118.0</td>\n <td>8.0</td>\n <td>72</td>\n <td>5.0</td>\n <td>2</td>\n <td>2010</td>\n <td>72</td>\n <td>C</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>12.0</td>\n <td>149.0</td>\n <td>12.6</td>\n <td>74</td>\n <td>5.0</td>\n <td>3</td>\n <td>2010</td>\n <td>74</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>3</th>\n <td>4</td>\n <td>18.0</td>\n <td>313.0</td>\n <td>11.5</td>\n <td>62</td>\n <td>5.0</td>\n <td>4</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>5</th>\n <td>6</td>\n <td>28.0</td>\n <td>NaN</td>\n <td>14.9</td>\n <td>66</td>\n <td>5.0</td>\n <td>6</td>\n <td>2010</td>\n <td>66</td>\n <td>C</td>\n </tr>\n <tr>\n <th>6</th>\n <td>7</td>\n <td>23.0</td>\n <td>299.0</td>\n <td>8.6</td>\n <td>65</td>\n <td>5.0</td>\n <td>7</td>\n <td>2010</td>\n <td>65</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>7</th>\n <td>8</td>\n <td>19.0</td>\n <td>99.0</td>\n <td>13.8</td>\n <td>59</td>\n <td>5.0</td>\n <td>8</td>\n <td>2010</td>\n <td>59</td>\n <td>C</td>\n </tr>\n <tr>\n <th>8</th>\n <td>9</td>\n <td>8.0</td>\n <td>19.0</td>\n <td>20.1</td>\n <td>61</td>\n <td>5.0</td>\n <td>9</td>\n <td>2010</td>\n <td>61</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>9</th>\n <td>10</td>\n <td>NaN</td>\n <td>194.0</td>\n <td>8.6</td>\n <td>69</td>\n <td>5.0</td>\n <td>10</td>\n <td>2010</td>\n <td>69</td>\n <td>S</td>\n </tr>\n <tr>\n <th>10</th>\n <td>11</td>\n <td>7.0</td>\n <td>NaN</td>\n <td>6.9</td>\n <td>C</td>\n <td>5.0</td>\n <td>11</td>\n <td>2010</td>\n <td>74</td>\n <td>C</td>\n </tr>\n <tr>\n <th>11</th>\n <td>12</td>\n <td>16.0</td>\n <td>256.0</td>\n <td>9.7</td>\n <td>69</td>\n <td>5.0</td>\n <td>12</td>\n <td>2010</td>\n <td>69</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>12</th>\n <td>13</td>\n <td>11.0</td>\n <td>290.0</td>\n <td>9.2</td>\n <td>66</td>\n <td>5.0</td>\n <td>13</td>\n <td>2010</td>\n <td>66</td>\n <td>S</td>\n </tr>\n <tr>\n <th>13</th>\n <td>14</td>\n <td>14.0</td>\n <td>274.0</td>\n <td>10.9</td>\n <td>68</td>\n <td>5.0</td>\n <td>14</td>\n <td>2010</td>\n <td>68</td>\n <td>S</td>\n </tr>\n <tr>\n <th>14</th>\n <td>15</td>\n <td>18.0</td>\n <td>65.0</td>\n <td>13.2</td>\n <td>58</td>\n <td>5.0</td>\n <td>15</td>\n <td>2010</td>\n <td>58</td>\n <td>C</td>\n </tr>\n <tr>\n <th>15</th>\n <td>16</td>\n <td>14.0</td>\n <td>334.0</td>\n <td>11.5</td>\n <td>64</td>\n <td>5.0</td>\n <td>16</td>\n <td>2010</td>\n <td>64</td>\n <td>S</td>\n </tr>\n <tr>\n <th>16</th>\n <td>17</td>\n <td>34.0</td>\n <td>307.0</td>\n <td>12.0</td>\n <td>66</td>\n <td>5.0</td>\n <td>17</td>\n <td>2010</td>\n <td>66</td>\n <td>S</td>\n </tr>\n <tr>\n <th>17</th>\n <td>18</td>\n <td>6.0</td>\n <td>78.0</td>\n <td>18.4</td>\n <td>57</td>\n <td>5.0</td>\n <td>18</td>\n <td>2010</td>\n <td>57</td>\n <td>C</td>\n </tr>\n <tr>\n <th>18</th>\n <td>19</td>\n <td>30.0</td>\n <td>322.0</td>\n <td>11.5</td>\n <td>68</td>\n <td>5.0</td>\n <td>19</td>\n <td>2010</td>\n <td>68</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>19</th>\n <td>20</td>\n <td>11.0</td>\n <td>44.0</td>\n <td>9.7</td>\n <td>62</td>\n <td>5.0</td>\n <td>20</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "7c27df5e",
"cell_type": "code",
"source": "data=data.drop_duplicates()",
"execution_count": 112,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "b8fdc1af",
"cell_type": "code",
"source": "data[data.duplicated()]",
"execution_count": 113,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 113,
"data": {
"text/plain": "Empty DataFrame\nColumns: [Unnamed: 0, Ozone, Solar.R, Wind, Temp C, Month, Day, Year, Temp, Weather]\nIndex: []",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>Ozone</th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "8ad10cf3",
"cell_type": "code",
"source": "data=data.drop(data.columns[0], axis=1)",
"execution_count": 114,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "96e4c1a6",
"cell_type": "code",
"source": " data",
"execution_count": 102,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 102,
"data": {
"text/plain": " Ozone Solar.R Wind Temp C Month Day Year Temp Weather\n0 41.0 190.0 7.4 67 5.0 1 2010 67 S\n1 36.0 118.0 8.0 72 5.0 2 2010 72 C\n2 12.0 149.0 12.6 74 5.0 3 2010 74 PS\n3 18.0 313.0 11.5 62 5.0 4 2010 62 S\n4 NaN NaN 14.3 56 5.0 5 2010 56 S\n.. ... ... ... ... ... ... ... ... ...\n153 41.0 190.0 7.4 67 5.0 1 2010 67 C\n154 30.0 193.0 6.9 70 9.0 26 2010 70 PS\n155 NaN 145.0 13.2 77 9.0 27 2010 77 S\n156 14.0 191.0 14.3 75 9.0 28 2010 75 S\n157 18.0 131.0 8.0 76 9.0 29 2010 76 C\n\n[158 rows x 9 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Ozone</th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>S</td>\n </tr>\n <tr>\n <th>1</th>\n <td>36.0</td>\n <td>118.0</td>\n <td>8.0</td>\n <td>72</td>\n <td>5.0</td>\n <td>2</td>\n <td>2010</td>\n <td>72</td>\n <td>C</td>\n </tr>\n <tr>\n <th>2</th>\n <td>12.0</td>\n <td>149.0</td>\n <td>12.6</td>\n <td>74</td>\n <td>5.0</td>\n <td>3</td>\n <td>2010</td>\n <td>74</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>3</th>\n <td>18.0</td>\n <td>313.0</td>\n <td>11.5</td>\n <td>62</td>\n <td>5.0</td>\n <td>4</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>153</th>\n <td>41.0</td>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>C</td>\n </tr>\n <tr>\n <th>154</th>\n <td>30.0</td>\n <td>193.0</td>\n <td>6.9</td>\n <td>70</td>\n <td>9.0</td>\n <td>26</td>\n <td>2010</td>\n <td>70</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>155</th>\n <td>NaN</td>\n <td>145.0</td>\n <td>13.2</td>\n <td>77</td>\n <td>9.0</td>\n <td>27</td>\n <td>2010</td>\n <td>77</td>\n <td>S</td>\n </tr>\n <tr>\n <th>156</th>\n <td>14.0</td>\n <td>191.0</td>\n <td>14.3</td>\n <td>75</td>\n <td>9.0</td>\n <td>28</td>\n <td>2010</td>\n <td>75</td>\n <td>S</td>\n </tr>\n <tr>\n <th>157</th>\n <td>18.0</td>\n <td>131.0</td>\n <td>8.0</td>\n <td>76</td>\n <td>9.0</td>\n <td>29</td>\n <td>2010</td>\n <td>76</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n<p>158 rows × 9 columns</p>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"id": "84e11bb9",
"cell_type": "code",
"source": "data=data.rename({'Solar.R':'Solar'},axis=1)",
"execution_count": 123,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "e906f90d",
"cell_type": "code",
"source": "data=data.drop(data.columns[0],axis=1)\n",
"execution_count": 121,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "c18680c1",
"cell_type": "code",
"source": "data",
"execution_count": 122,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 122,
"data": {
"text/plain": " Solar.R Wind Temp C Month Day Year Temp Weather\n0 190.0 7.4 67 5.0 1 2010 67 S\n1 118.0 8.0 72 5.0 2 2010 72 C\n2 149.0 12.6 74 5.0 3 2010 74 PS\n3 313.0 11.5 62 5.0 4 2010 62 S\n4 NaN 14.3 56 5.0 5 2010 56 S\n.. ... ... ... ... ... ... ... ...\n153 190.0 7.4 67 5.0 1 2010 67 C\n154 193.0 6.9 70 9.0 26 2010 70 PS\n155 145.0 13.2 77 9.0 27 2010 77 S\n156 191.0 14.3 75 9.0 28 2010 75 S\n157 131.0 8.0 76 9.0 29 2010 76 C\n\n[158 rows x 8 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Solar.R</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>S</td>\n </tr>\n <tr>\n <th>1</th>\n <td>118.0</td>\n <td>8.0</td>\n <td>72</td>\n <td>5.0</td>\n <td>2</td>\n <td>2010</td>\n <td>72</td>\n <td>C</td>\n </tr>\n <tr>\n <th>2</th>\n <td>149.0</td>\n <td>12.6</td>\n <td>74</td>\n <td>5.0</td>\n <td>3</td>\n <td>2010</td>\n <td>74</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>3</th>\n <td>313.0</td>\n <td>11.5</td>\n <td>62</td>\n <td>5.0</td>\n <td>4</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>153</th>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>C</td>\n </tr>\n <tr>\n <th>154</th>\n <td>193.0</td>\n <td>6.9</td>\n <td>70</td>\n <td>9.0</td>\n <td>26</td>\n <td>2010</td>\n <td>70</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>155</th>\n <td>145.0</td>\n <td>13.2</td>\n <td>77</td>\n <td>9.0</td>\n <td>27</td>\n <td>2010</td>\n <td>77</td>\n <td>S</td>\n </tr>\n <tr>\n <th>156</th>\n <td>191.0</td>\n <td>14.3</td>\n <td>75</td>\n <td>9.0</td>\n <td>28</td>\n <td>2010</td>\n <td>75</td>\n <td>S</td>\n </tr>\n <tr>\n <th>157</th>\n <td>131.0</td>\n <td>8.0</td>\n <td>76</td>\n <td>9.0</td>\n <td>29</td>\n <td>2010</td>\n <td>76</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n<p>158 rows × 8 columns</p>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": false
},
"id": "be4917f6",
"cell_type": "code",
"source": "data['Weather'].value_counts().plot.bar()",
"execution_count": 59,
"outputs": [
{
"data": {
"text/plain": "<AxesSubplot:>"
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD7CAYAAABzGc+QAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAMDklEQVR4nO3cb4hl913H8ffH3ZaU1tIsmV2XpnHUrtVQyR/GEAkW2jWSGnHXB9EW1EEWF0ElgiCr4gOfrU+kCiIuTXXE2ja0hl3a0naZGP9ASTPbxqZxU7aENA3Z7k5Tq2kU08SvD+asXWZn9965M3cm3+z7BeGc87vn5n7hwpuTM+cmVYUkqZ/v2e4BJEmTMeCS1JQBl6SmDLgkNWXAJakpAy5JTe3cyg+77rrranZ2dis/UpLaO3Xq1Deqamb1+pYGfHZ2lqWlpa38SElqL8lX11r3FookNTVWwJO8KclHkzyR5HSSn0iyK8nJJGeG7bXTHlaS9F3jXoH/KfCpqvoR4CbgNHAEWKyqfcDicCxJ2iIjA57kjcA7gPsAqurFqvoWcABYGE5bAA5OZ0RJ0lrGuQL/QWAZ+KskX0jy/iSvB/ZU1VmAYbt7inNKklYZJ+A7gVuBv6iqW4AXWMftkiSHkywlWVpeXp5wTEnSauME/Bngmap6eDj+KCtBP5dkL8CwPb/Wm6vqWFXNVdXczMwljzFKkiY0MuBV9XXga0neNiztB/4NOAHMD2vzwPGpTChJWtO4P+T5LeCDSV4LPAn8Kivxvz/JIeBp4J7pjDi52SOf2O4Rpuqpo3dv9wiSttFYAa+qR4G5NV7av6nTSJLG5i8xJakpAy5JTRlwSWrKgEtSUwZckpoy4JLUlAGXpKYMuCQ1ZcAlqSkDLklNGXBJasqAS1JTBlySmjLgktSUAZekpgy4JDVlwCWpKQMuSU0ZcElqyoBLUlMGXJKaMuCS1JQBl6SmDLgkNWXAJakpAy5JTe0c56QkTwHPAy8DL1XVXJJdwEeAWeAp4Beq6t+nM6YkabX1XIG/s6purqq54fgIsFhV+4DF4ViStEU2cgvlALAw7C8ABzc8jSRpbOMGvIDPJDmV5PCwtqeqzgIM293TGFCStLax7oEDd1TVs0l2AyeTPDHuBwzBPwxwww03TDCiJGktY12BV9Wzw/Y88ABwG3AuyV6AYXv+Mu89VlVzVTU3MzOzOVNLkkYHPMnrk3zvhX3gp4EvASeA+eG0eeD4tIaUJF1qnFsoe4AHklw4/++q6lNJHgHuT3IIeBq4Z3pjSpJWGxnwqnoSuGmN9eeA/dMYSpo98ontHmGqnjp693aPoFcBf4kpSU0ZcElqyoBLUlMGXJKaMuCS1JQBl6Smxv0pvSSNzcdAt4ZX4JLUlAGXpKYMuCQ1ZcAlqSkDLklNGXBJasqAS1JTBlySmjLgktSUAZekpgy4JDVlwCWpKQMuSU0ZcElqyoBLUlMGXJKaMuCS1JQBl6SmDLgkNTV2wJPsSPKFJB8fjnclOZnkzLC9dnpjSpJWW88V+L3A6YuOjwCLVbUPWByOJUlbZKyAJ7keuBt4/0XLB4CFYX8BOLipk0mSrmjcK/D3Ab8L/O9Fa3uq6izAsN29uaNJkq5kZMCT/CxwvqpOTfIBSQ4nWUqytLy8PMm/QpK0hnGuwO8Afi7JU8CHgXcl+VvgXJK9AMP2/FpvrqpjVTVXVXMzMzObNLYkaWTAq+r3qur6qpoF3gM8WFW/BJwA5ofT5oHjU5tSknSJjTwHfhS4M8kZ4M7hWJK0RXau5+Sqegh4aNh/Dti/+SNJksbhLzElqSkDLklNGXBJasqAS1JTBlySmjLgktSUAZekpgy4JDVlwCWpKQMuSU0ZcElqyoBLUlMGXJKaMuCS1JQBl6SmDLgkNWXAJakpAy5JTRlwSWrKgEtSUwZckpoy4JLUlAGXpKYMuCQ1ZcAlqSkDLklNjQx4kmuSfC7JvyZ5PMkfDeu7kpxMcmbYXjv9cSVJF4xzBf4/wLuq6ibgZuCuJLcDR4DFqtoHLA7HkqQtMjLgteLbw+Frhn8KOAAsDOsLwMFpDChJWttY98CT7EjyKHAeOFlVDwN7quoswLDdPbUpJUmXGCvgVfVyVd0MXA/cluTt435AksNJlpIsLS8vTzimJGm1dT2FUlXfAh4C7gLOJdkLMGzPX+Y9x6pqrqrmZmZmNjatJOn/jfMUykySNw37rwN+CngCOAHMD6fNA8enNKMkaQ07xzhnL7CQZAcrwb+/qj6e5LPA/UkOAU8D90xxTknSKiMDXlVfBG5ZY/05YP80hpIkjeYvMSWpKQMuSU0ZcElqyoBLUlMGXJKaMuCS1JQBl6SmDLgkNWXAJakpAy5JTRlwSWrKgEtSUwZckpoy4JLUlAGXpKYMuCQ1ZcAlqSkDLklNGXBJasqAS1JTBlySmjLgktSUAZekpgy4JDVlwCWpKQMuSU2NDHiStyT5hySnkzye5N5hfVeSk0nODNtrpz+uJOmCca7AXwJ+p6p+FLgd+I0kNwJHgMWq2gcsDseSpC0yMuBVdbaqPj/sPw+cBt4MHAAWhtMWgINTmlGStIZ13QNPMgvcAjwM7Kmqs7ASeWD3pk8nSbqssQOe5A3Ax4Dfrqr/XMf7DidZSrK0vLw8yYySpDWMFfAkr2El3h+sqr8fls8l2Tu8vhc4v9Z7q+pYVc1V1dzMzMxmzCxJYrynUALcB5yuqj+56KUTwPywPw8c3/zxJEmXs3OMc+4Afhl4LMmjw9rvA0eB+5McAp4G7pnKhJKkNY0MeFX9C5DLvLx/c8eRJI3LX2JKUlMGXJKaMuCS1JQBl6SmDLgkNWXAJakpAy5JTRlwSWrKgEtSUwZckpoy4JLUlAGXpKYMuCQ1ZcAlqSkDLklNGXBJasqAS1JTBlySmjLgktSUAZekpgy4JDVlwCWpKQMuSU0ZcElqyoBLUlMGXJKaMuCS1NTIgCf5QJLzSb500dquJCeTnBm21053TEnSauNcgf81cNeqtSPAYlXtAxaHY0nSFhoZ8Kr6J+Cbq5YPAAvD/gJwcHPHkiSNMuk98D1VdRZg2O6+3IlJDidZSrK0vLw84cdJklab+h8xq+pYVc1V1dzMzMy0P06SrhqTBvxckr0Aw/b85o0kSRrHpAE/AcwP+/PA8c0ZR5I0rnEeI/wQ8FngbUmeSXIIOArcmeQMcOdwLEnaQjtHnVBV773MS/s3eRZJ0jr4S0xJasqAS1JTBlySmjLgktSUAZekpgy4JDVlwCWpKQMuSU0ZcElqyoBLUlMGXJKaMuCS1JQBl6SmDLgkNWXAJakpAy5JTRlwSWrKgEtSUwZckpoy4JLUlAGXpKYMuCQ1ZcAlqSkDLklNGXBJasqAS1JTGwp4kruSfDnJV5Ic2ayhJEmjTRzwJDuAPwfeDdwIvDfJjZs1mCTpyjZyBX4b8JWqerKqXgQ+DBzYnLEkSaNsJOBvBr520fEzw5okaQvs3MB7s8ZaXXJSchg4PBx+O8mXN/CZr3TXAd/Yqg/LH2/VJ10V/O56e7V/f9+/1uJGAv4M8JaLjq8Hnl19UlUdA45t4HPaSLJUVXPbPYfWz++ut6v1+9vILZRHgH1JfiDJa4H3ACc2ZyxJ0igTX4FX1UtJfhP4NLAD+EBVPb5pk0mSrmgjt1Coqk8Cn9ykWV4NropbRa9Sfne9XZXfX6ou+bujJKkBf0ovSU0ZcElqyoBPIMmPJ/m+i45/JcnxJH+WZNd2zqbRkrw1yR1rrP9kkh/ajpmkSRjwyfwl8CJAkncAR4G/Af6Dq/SPKc28D3h+jfX/Hl7TK5gXUN9lwCezo6q+Oez/InCsqj5WVX8IvHUb59J4Zqvqi6sXq2oJmN36cbROXkANDPhkdiS58AjmfuDBi17b0KOZ2hLXXOG1123ZFJqUF1ADAz6ZDwH/mOQ4K//Z/c+wcm+VlasAvbI9kuTXVi8mOQSc2oZ5tD5eQA18DnxCSW4H9gKfqaoXhrUfBt5QVZ/f1uF0RUn2AA+w8p/hF4I9B7wW+Pmq+vp2zabRkvwB8DOs/M+rbgBuraoaLqAWquqSP1C/WhlwXbWSvBN4+3D4eFU9eKXz9crhBdQKAy6plSTXAL/Oyv3ux4D7quql7Z1qexhwSa0k+QjwHVb+9vRu4KtVde/2TrU9DLikVpI8VlU/NuzvBD5XVbdu81jbwqdQJHXznQs7V+utkwu8ApfUSpKXgRcuHLLy7P5/DftVVW/crtm2mgGXpKa8hSJJTRlwSWrKgEtSUwZckpoy4JLU1P8B9mDMRIWYoZcAAAAASUVORK5CYII=\n",
"text/plain": "<Figure size 432x288 with 1 Axes>"
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"trusted": true
},
"id": "f3fbaf5d",
"cell_type": "code",
"source": "data",
"execution_count": 124,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 124,
"data": {
"text/plain": " Solar Wind Temp C Month Day Year Temp Weather\n0 190.0 7.4 67 5.0 1 2010 67 S\n1 118.0 8.0 72 5.0 2 2010 72 C\n2 149.0 12.6 74 5.0 3 2010 74 PS\n3 313.0 11.5 62 5.0 4 2010 62 S\n4 NaN 14.3 56 5.0 5 2010 56 S\n.. ... ... ... ... ... ... ... ...\n153 190.0 7.4 67 5.0 1 2010 67 C\n154 193.0 6.9 70 9.0 26 2010 70 PS\n155 145.0 13.2 77 9.0 27 2010 77 S\n156 191.0 14.3 75 9.0 28 2010 75 S\n157 131.0 8.0 76 9.0 29 2010 76 C\n\n[158 rows x 8 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Solar</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>S</td>\n </tr>\n <tr>\n <th>1</th>\n <td>118.0</td>\n <td>8.0</td>\n <td>72</td>\n <td>5.0</td>\n <td>2</td>\n <td>2010</td>\n <td>72</td>\n <td>C</td>\n </tr>\n <tr>\n <th>2</th>\n <td>149.0</td>\n <td>12.6</td>\n <td>74</td>\n <td>5.0</td>\n <td>3</td>\n <td>2010</td>\n <td>74</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>3</th>\n <td>313.0</td>\n <td>11.5</td>\n <td>62</td>\n <td>5.0</td>\n <td>4</td>\n <td>2010</td>\n <td>62</td>\n <td>S</td>\n </tr>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>153</th>\n <td>190.0</td>\n <td>7.4</td>\n <td>67</td>\n <td>5.0</td>\n <td>1</td>\n <td>2010</td>\n <td>67</td>\n <td>C</td>\n </tr>\n <tr>\n <th>154</th>\n <td>193.0</td>\n <td>6.9</td>\n <td>70</td>\n <td>9.0</td>\n <td>26</td>\n <td>2010</td>\n <td>70</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>155</th>\n <td>145.0</td>\n <td>13.2</td>\n <td>77</td>\n <td>9.0</td>\n <td>27</td>\n <td>2010</td>\n <td>77</td>\n <td>S</td>\n </tr>\n <tr>\n <th>156</th>\n <td>191.0</td>\n <td>14.3</td>\n <td>75</td>\n <td>9.0</td>\n <td>28</td>\n <td>2010</td>\n <td>75</td>\n <td>S</td>\n </tr>\n <tr>\n <th>157</th>\n <td>131.0</td>\n <td>8.0</td>\n <td>76</td>\n <td>9.0</td>\n <td>29</td>\n <td>2010</td>\n <td>76</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n<p>158 rows × 8 columns</p>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "data[data.isnull().any(axis=1)]",
"execution_count": 127,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 127,
"data": {
"text/plain": " Solar Wind Temp C Month Day Year Temp Weather\n4 NaN 14.3 56 5.0 5 2010 56 S\n5 NaN 14.9 66 5.0 6 2010 66 C\n10 NaN 6.9 C 5.0 11 2010 74 C\n23 92.0 12.0 61 NaN 24 2010 61 C\n26 NaN 8.0 57 5.0 27 2010 57 PS\n87 82.0 12.0 86 7.0 27 2010 86 NaN\n93 24.0 13.8 81 8.0 2 2010 81 NaN\n95 NaN 6.9 86 8.0 4 2010 86 NaN\n96 NaN 7.4 85 8.0 5 2010 85 S\n97 NaN 4.6 87 8.0 6 2010 87 C",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Solar</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>5</th>\n <td>NaN</td>\n <td>14.9</td>\n <td>66</td>\n <td>5.0</td>\n <td>6</td>\n <td>2010</td>\n <td>66</td>\n <td>C</td>\n </tr>\n <tr>\n <th>10</th>\n <td>NaN</td>\n <td>6.9</td>\n <td>C</td>\n <td>5.0</td>\n <td>11</td>\n <td>2010</td>\n <td>74</td>\n <td>C</td>\n </tr>\n <tr>\n <th>23</th>\n <td>92.0</td>\n <td>12.0</td>\n <td>61</td>\n <td>NaN</td>\n <td>24</td>\n <td>2010</td>\n <td>61</td>\n <td>C</td>\n </tr>\n <tr>\n <th>26</th>\n <td>NaN</td>\n <td>8.0</td>\n <td>57</td>\n <td>5.0</td>\n <td>27</td>\n <td>2010</td>\n <td>57</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>87</th>\n <td>82.0</td>\n <td>12.0</td>\n <td>86</td>\n <td>7.0</td>\n <td>27</td>\n <td>2010</td>\n <td>86</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>93</th>\n <td>24.0</td>\n <td>13.8</td>\n <td>81</td>\n <td>8.0</td>\n <td>2</td>\n <td>2010</td>\n <td>81</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>95</th>\n <td>NaN</td>\n <td>6.9</td>\n <td>86</td>\n <td>8.0</td>\n <td>4</td>\n <td>2010</td>\n <td>86</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>96</th>\n <td>NaN</td>\n <td>7.4</td>\n <td>85</td>\n <td>8.0</td>\n <td>5</td>\n <td>2010</td>\n <td>85</td>\n <td>S</td>\n </tr>\n <tr>\n <th>97</th>\n <td>NaN</td>\n <td>4.6</td>\n <td>87</td>\n <td>8.0</td>\n <td>6</td>\n <td>2010</td>\n <td>87</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "data[['Solar']].median()",
"execution_count": 136,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 136,
"data": {
"text/plain": "Solar 82.0\ndtype: float64"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "data['Solar']=data['Solar'].fillna(82)",
"execution_count": 139,
"outputs": [
{
"output_type": "stream",
"text": "<ipython-input-139-3ea0e88904dc>:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n data['Solar']=data['Solar'].fillna(82)\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "data",
"execution_count": 140,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 140,
"data": {
"text/plain": " Solar Wind Temp C Month Day Year Temp Weather\n4 82.0 14.3 56 5.0 5 2010 56 S\n5 82.0 14.9 66 5.0 6 2010 66 C\n10 82.0 6.9 C 5.0 11 2010 74 C\n23 92.0 12.0 61 NaN 24 2010 61 C\n26 82.0 8.0 57 5.0 27 2010 57 PS\n87 82.0 12.0 86 7.0 27 2010 86 NaN\n93 24.0 13.8 81 8.0 2 2010 81 NaN\n95 82.0 6.9 86 8.0 4 2010 86 NaN\n96 82.0 7.4 85 8.0 5 2010 85 S\n97 82.0 4.6 87 8.0 6 2010 87 C",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Solar</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>4</th>\n <td>82.0</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>5</th>\n <td>82.0</td>\n <td>14.9</td>\n <td>66</td>\n <td>5.0</td>\n <td>6</td>\n <td>2010</td>\n <td>66</td>\n <td>C</td>\n </tr>\n <tr>\n <th>10</th>\n <td>82.0</td>\n <td>6.9</td>\n <td>C</td>\n <td>5.0</td>\n <td>11</td>\n <td>2010</td>\n <td>74</td>\n <td>C</td>\n </tr>\n <tr>\n <th>23</th>\n <td>92.0</td>\n <td>12.0</td>\n <td>61</td>\n <td>NaN</td>\n <td>24</td>\n <td>2010</td>\n <td>61</td>\n <td>C</td>\n </tr>\n <tr>\n <th>26</th>\n <td>82.0</td>\n <td>8.0</td>\n <td>57</td>\n <td>5.0</td>\n <td>27</td>\n <td>2010</td>\n <td>57</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>87</th>\n <td>82.0</td>\n <td>12.0</td>\n <td>86</td>\n <td>7.0</td>\n <td>27</td>\n <td>2010</td>\n <td>86</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>93</th>\n <td>24.0</td>\n <td>13.8</td>\n <td>81</td>\n <td>8.0</td>\n <td>2</td>\n <td>2010</td>\n <td>81</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>95</th>\n <td>82.0</td>\n <td>6.9</td>\n <td>86</td>\n <td>8.0</td>\n <td>4</td>\n <td>2010</td>\n <td>86</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>96</th>\n <td>82.0</td>\n <td>7.4</td>\n <td>85</td>\n <td>8.0</td>\n <td>5</td>\n <td>2010</td>\n <td>85</td>\n <td>S</td>\n </tr>\n <tr>\n <th>97</th>\n <td>82.0</td>\n <td>4.6</td>\n <td>87</td>\n <td>8.0</td>\n <td>6</td>\n <td>2010</td>\n <td>87</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "data.fillna(data.mode().iloc[0])",
"execution_count": 142,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 142,
"data": {
"text/plain": " Solar Wind Temp C Month Day Year Temp Weather\n4 82.0 14.3 56 5.0 5 2010 56 S\n5 82.0 14.9 66 5.0 6 2010 66 C\n10 82.0 6.9 C 5.0 11 2010 74 C\n23 92.0 12.0 61 5.0 24 2010 61 C\n26 82.0 8.0 57 5.0 27 2010 57 PS\n87 82.0 12.0 86 7.0 27 2010 86 C\n93 24.0 13.8 81 8.0 2 2010 81 C\n95 82.0 6.9 86 8.0 4 2010 86 C\n96 82.0 7.4 85 8.0 5 2010 85 S\n97 82.0 4.6 87 8.0 6 2010 87 C",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Solar</th>\n <th>Wind</th>\n <th>Temp C</th>\n <th>Month</th>\n <th>Day</th>\n <th>Year</th>\n <th>Temp</th>\n <th>Weather</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>4</th>\n <td>82.0</td>\n <td>14.3</td>\n <td>56</td>\n <td>5.0</td>\n <td>5</td>\n <td>2010</td>\n <td>56</td>\n <td>S</td>\n </tr>\n <tr>\n <th>5</th>\n <td>82.0</td>\n <td>14.9</td>\n <td>66</td>\n <td>5.0</td>\n <td>6</td>\n <td>2010</td>\n <td>66</td>\n <td>C</td>\n </tr>\n <tr>\n <th>10</th>\n <td>82.0</td>\n <td>6.9</td>\n <td>C</td>\n <td>5.0</td>\n <td>11</td>\n <td>2010</td>\n <td>74</td>\n <td>C</td>\n </tr>\n <tr>\n <th>23</th>\n <td>92.0</td>\n <td>12.0</td>\n <td>61</td>\n <td>5.0</td>\n <td>24</td>\n <td>2010</td>\n <td>61</td>\n <td>C</td>\n </tr>\n <tr>\n <th>26</th>\n <td>82.0</td>\n <td>8.0</td>\n <td>57</td>\n <td>5.0</td>\n <td>27</td>\n <td>2010</td>\n <td>57</td>\n <td>PS</td>\n </tr>\n <tr>\n <th>87</th>\n <td>82.0</td>\n <td>12.0</td>\n <td>86</td>\n <td>7.0</td>\n <td>27</td>\n <td>2010</td>\n <td>86</td>\n <td>C</td>\n </tr>\n <tr>\n <th>93</th>\n <td>24.0</td>\n <td>13.8</td>\n <td>81</td>\n <td>8.0</td>\n <td>2</td>\n <td>2010</td>\n <td>81</td>\n <td>C</td>\n </tr>\n <tr>\n <th>95</th>\n <td>82.0</td>\n <td>6.9</td>\n <td>86</td>\n <td>8.0</td>\n <td>4</td>\n <td>2010</td>\n <td>86</td>\n <td>C</td>\n </tr>\n <tr>\n <th>96</th>\n <td>82.0</td>\n <td>7.4</td>\n <td>85</td>\n <td>8.0</td>\n <td>5</td>\n <td>2010</td>\n <td>85</td>\n <td>S</td>\n </tr>\n <tr>\n <th>97</th>\n <td>82.0</td>\n <td>4.6</td>\n <td>87</td>\n <td>8.0</td>\n <td>6</td>\n <td>2010</td>\n <td>87</td>\n <td>C</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import sweetviz as sv\nsweet_report= sv.analyze(data)\nsweet_report.show_html(\"weather\")",
"execution_count": 146,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": " | | [ 0%] 00:00 ->…",
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "45029316f9de4687951b5af340855ad0"
}
},
"metadata": {}
},
{
"output_type": "stream",
"text": "Report weather was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pip install sweetviz",
"execution_count": 145,
"outputs": [
{
"output_type": "stream",
"text": "Collecting sweetviz\n Downloading sweetviz-2.1.2-py3-none-any.whl (15.1 MB)\nRequirement already satisfied: jinja2>=2.11.1 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from sweetviz) (2.11.3)\nRequirement already satisfied: tqdm>=4.43.0 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from sweetviz) (4.59.0)\nRequirement already satisfied: numpy>=1.16.0 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from sweetviz) (1.20.1)\nRequirement already satisfied: scipy>=1.3.2 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from sweetviz) (1.6.2)\nCollecting importlib-resources>=1.2.0\n Downloading importlib_resources-5.2.0-py3-none-any.whl (27 kB)\nRequirement already satisfied: pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from sweetviz) (1.2.4)\nRequirement already satisfied: matplotlib>=3.1.3 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from sweetviz) (3.3.4)\nRequirement already satisfied: zipp>=3.1.0 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from importlib-resources>=1.2.0->sweetviz) (3.4.1)\nRequirement already satisfied: MarkupSafe>=0.23 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from jinja2>=2.11.1->sweetviz) (1.1.1)\nRequirement already satisfied: pillow>=6.2.0 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from matplotlib>=3.1.3->sweetviz) (8.2.0)\nRequirement already satisfied: python-dateutil>=2.1 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from matplotlib>=3.1.3->sweetviz) (2.8.1)\nRequirement already satisfied: cycler>=0.10 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from matplotlib>=3.1.3->sweetviz) (0.10.0)\nRequirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from matplotlib>=3.1.3->sweetviz) (2.4.7)\nRequirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from matplotlib>=3.1.3->sweetviz) (1.3.1)\nRequirement already satisfied: six in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from cycler>=0.10->matplotlib>=3.1.3->sweetviz) (1.15.0)\nRequirement already satisfied: pytz>=2017.3 in c:\\users\\pushkar\\anaconda\\lib\\site-packages (from pandas!=1.0.0,!=1.0.1,!=1.0.2,>=0.25.3->sweetviz) (2021.1)\nInstalling collected packages: importlib-resources, sweetviz\nSuccessfully installed importlib-resources-5.2.0 sweetviz-2.1.2\nNote: you may need to restart the kernel to use updated packages.\n",
"name": "stdout"
}
]
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.8",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"varInspector": {
"window_display": false,
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"library": "var_list.py",
"delete_cmd_prefix": "del ",
"delete_cmd_postfix": "",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"library": "var_list.r",
"delete_cmd_prefix": "rm(",
"delete_cmd_postfix": ") ",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
]
},
"gist": {
"id": "",
"data": {
"description": "data clean ",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment