Skip to content

Instantly share code, notes, and snippets.

@pree62
Created August 3, 2023 10:24
Show Gist options
  • Save pree62/c01d4fef908af3276fb8f60c4861ff68 to your computer and use it in GitHub Desktop.
Save pree62/c01d4fef908af3276fb8f60c4861ff68 to your computer and use it in GitHub Desktop.
!excelR/assignments/Gists/RF - Company Data.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"id": "it2UKlhX85yD"
},
"cell_type": "markdown",
"source": "# COMPANY DATA RANDOM FOREST\n"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:52:42.444233Z",
"start_time": "2023-08-01T10:52:39.725109Z"
},
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets \nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn import tree\nfrom sklearn.metrics import classification_report\nfrom sklearn import preprocessing ",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:52:44.639883Z",
"start_time": "2023-08-01T10:52:44.511644Z"
},
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "XSFakjvm8-4z",
"outputId": "6ffbb093-dd3c-4609-ce1a-3f91e2074a6c",
"trusted": true
},
"cell_type": "code",
"source": "data = pd.read_csv(\"Company_Data.csv\")\ndata",
"execution_count": 4,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Sales</th>\n <th>CompPrice</th>\n <th>Income</th>\n <th>Advertising</th>\n <th>Population</th>\n <th>Price</th>\n <th>ShelveLoc</th>\n <th>Age</th>\n <th>Education</th>\n <th>Urban</th>\n <th>US</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>9.50</td>\n <td>138</td>\n <td>73</td>\n <td>11</td>\n <td>276</td>\n <td>120</td>\n <td>Bad</td>\n <td>42</td>\n <td>17</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>11.22</td>\n <td>111</td>\n <td>48</td>\n <td>16</td>\n <td>260</td>\n <td>83</td>\n <td>Good</td>\n <td>65</td>\n <td>10</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10.06</td>\n <td>113</td>\n <td>35</td>\n <td>10</td>\n <td>269</td>\n <td>80</td>\n <td>Medium</td>\n <td>59</td>\n <td>12</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>7.40</td>\n <td>117</td>\n <td>100</td>\n <td>4</td>\n <td>466</td>\n <td>97</td>\n <td>Medium</td>\n <td>55</td>\n <td>14</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4.15</td>\n <td>141</td>\n <td>64</td>\n <td>3</td>\n <td>340</td>\n <td>128</td>\n <td>Bad</td>\n <td>38</td>\n <td>13</td>\n <td>Yes</td>\n <td>No</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>395</th>\n <td>12.57</td>\n <td>138</td>\n <td>108</td>\n <td>17</td>\n <td>203</td>\n <td>128</td>\n <td>Good</td>\n <td>33</td>\n <td>14</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>396</th>\n <td>6.14</td>\n <td>139</td>\n <td>23</td>\n <td>3</td>\n <td>37</td>\n <td>120</td>\n <td>Medium</td>\n <td>55</td>\n <td>11</td>\n <td>No</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>397</th>\n <td>7.41</td>\n <td>162</td>\n <td>26</td>\n <td>12</td>\n <td>368</td>\n <td>159</td>\n <td>Medium</td>\n <td>40</td>\n <td>18</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>398</th>\n <td>5.94</td>\n <td>100</td>\n <td>79</td>\n <td>7</td>\n <td>284</td>\n <td>95</td>\n <td>Bad</td>\n <td>50</td>\n <td>12</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>399</th>\n <td>9.71</td>\n <td>134</td>\n <td>37</td>\n <td>0</td>\n <td>27</td>\n <td>120</td>\n <td>Good</td>\n <td>49</td>\n <td>16</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n </tbody>\n</table>\n<p>400 rows × 11 columns</p>\n</div>",
"text/plain": " Sales CompPrice Income Advertising Population Price ShelveLoc Age \\\n0 9.50 138 73 11 276 120 Bad 42 \n1 11.22 111 48 16 260 83 Good 65 \n2 10.06 113 35 10 269 80 Medium 59 \n3 7.40 117 100 4 466 97 Medium 55 \n4 4.15 141 64 3 340 128 Bad 38 \n.. ... ... ... ... ... ... ... ... \n395 12.57 138 108 17 203 128 Good 33 \n396 6.14 139 23 3 37 120 Medium 55 \n397 7.41 162 26 12 368 159 Medium 40 \n398 5.94 100 79 7 284 95 Bad 50 \n399 9.71 134 37 0 27 120 Good 49 \n\n Education Urban US \n0 17 Yes Yes \n1 10 Yes Yes \n2 12 Yes Yes \n3 14 Yes Yes \n4 13 Yes No \n.. ... ... ... \n395 14 Yes Yes \n396 11 No Yes \n397 18 Yes Yes \n398 12 Yes Yes \n399 16 Yes Yes \n\n[400 rows x 11 columns]"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:52:49.417704Z",
"start_time": "2023-08-01T10:52:49.338573Z"
},
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aGYk0mUo9Fmu",
"outputId": "3eac1732-a417-49d0-dd06-18eb4de425bc",
"trusted": true
},
"cell_type": "code",
"source": "#Converting Numerical data to Categorical Data using Bins in pd.cut\ncat_sales = pd.cut(data['Sales'],bins=[-1,7.6,16.270],labels=['low sales','high sales'])\ncat_sales.describe()",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": "count 400\nunique 2\ntop low sales\nfreq 211\nName: Sales, dtype: object"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:07.750825Z",
"start_time": "2023-08-01T10:53:07.670788Z"
},
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "2YHJAepo9H0k",
"outputId": "775a9ee3-cc5c-46d4-9d1d-e1c530ccfa43",
"trusted": true
},
"cell_type": "code",
"source": "data['Sales']=cat_sales\ndata",
"execution_count": 6,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Sales</th>\n <th>CompPrice</th>\n <th>Income</th>\n <th>Advertising</th>\n <th>Population</th>\n <th>Price</th>\n <th>ShelveLoc</th>\n <th>Age</th>\n <th>Education</th>\n <th>Urban</th>\n <th>US</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>high sales</td>\n <td>138</td>\n <td>73</td>\n <td>11</td>\n <td>276</td>\n <td>120</td>\n <td>Bad</td>\n <td>42</td>\n <td>17</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>1</th>\n <td>high sales</td>\n <td>111</td>\n <td>48</td>\n <td>16</td>\n <td>260</td>\n <td>83</td>\n <td>Good</td>\n <td>65</td>\n <td>10</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>2</th>\n <td>high sales</td>\n <td>113</td>\n <td>35</td>\n <td>10</td>\n <td>269</td>\n <td>80</td>\n <td>Medium</td>\n <td>59</td>\n <td>12</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>3</th>\n <td>low sales</td>\n <td>117</td>\n <td>100</td>\n <td>4</td>\n <td>466</td>\n <td>97</td>\n <td>Medium</td>\n <td>55</td>\n <td>14</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>4</th>\n <td>low sales</td>\n <td>141</td>\n <td>64</td>\n <td>3</td>\n <td>340</td>\n <td>128</td>\n <td>Bad</td>\n <td>38</td>\n <td>13</td>\n <td>Yes</td>\n <td>No</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>395</th>\n <td>high sales</td>\n <td>138</td>\n <td>108</td>\n <td>17</td>\n <td>203</td>\n <td>128</td>\n <td>Good</td>\n <td>33</td>\n <td>14</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>396</th>\n <td>low sales</td>\n <td>139</td>\n <td>23</td>\n <td>3</td>\n <td>37</td>\n <td>120</td>\n <td>Medium</td>\n <td>55</td>\n <td>11</td>\n <td>No</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>397</th>\n <td>low sales</td>\n <td>162</td>\n <td>26</td>\n <td>12</td>\n <td>368</td>\n <td>159</td>\n <td>Medium</td>\n <td>40</td>\n <td>18</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>398</th>\n <td>low sales</td>\n <td>100</td>\n <td>79</td>\n <td>7</td>\n <td>284</td>\n <td>95</td>\n <td>Bad</td>\n <td>50</td>\n <td>12</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n <tr>\n <th>399</th>\n <td>high sales</td>\n <td>134</td>\n <td>37</td>\n <td>0</td>\n <td>27</td>\n <td>120</td>\n <td>Good</td>\n <td>49</td>\n <td>16</td>\n <td>Yes</td>\n <td>Yes</td>\n </tr>\n </tbody>\n</table>\n<p>400 rows × 11 columns</p>\n</div>",
"text/plain": " Sales CompPrice Income Advertising Population Price ShelveLoc \\\n0 high sales 138 73 11 276 120 Bad \n1 high sales 111 48 16 260 83 Good \n2 high sales 113 35 10 269 80 Medium \n3 low sales 117 100 4 466 97 Medium \n4 low sales 141 64 3 340 128 Bad \n.. ... ... ... ... ... ... ... \n395 high sales 138 108 17 203 128 Good \n396 low sales 139 23 3 37 120 Medium \n397 low sales 162 26 12 368 159 Medium \n398 low sales 100 79 7 284 95 Bad \n399 high sales 134 37 0 27 120 Good \n\n Age Education Urban US \n0 42 17 Yes Yes \n1 65 10 Yes Yes \n2 59 12 Yes Yes \n3 55 14 Yes Yes \n4 38 13 Yes No \n.. ... ... ... ... \n395 33 14 Yes Yes \n396 55 11 No Yes \n397 40 18 Yes Yes \n398 50 12 Yes Yes \n399 49 16 Yes Yes \n\n[400 rows x 11 columns]"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:18.700292Z",
"start_time": "2023-08-01T10:53:18.645567Z"
},
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "2li8r-lB9JyZ",
"outputId": "f94207fd-fa09-4f39-d291-fe46cf73bac9",
"trusted": true
},
"cell_type": "code",
"source": "label_encoder = preprocessing.LabelEncoder()\ndata['Urban']= label_encoder.fit_transform(data['Urban'])\ndata['US']= label_encoder.fit_transform(data['US'])\ndata",
"execution_count": 7,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Sales</th>\n <th>CompPrice</th>\n <th>Income</th>\n <th>Advertising</th>\n <th>Population</th>\n <th>Price</th>\n <th>ShelveLoc</th>\n <th>Age</th>\n <th>Education</th>\n <th>Urban</th>\n <th>US</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>high sales</td>\n <td>138</td>\n <td>73</td>\n <td>11</td>\n <td>276</td>\n <td>120</td>\n <td>Bad</td>\n <td>42</td>\n <td>17</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>high sales</td>\n <td>111</td>\n <td>48</td>\n <td>16</td>\n <td>260</td>\n <td>83</td>\n <td>Good</td>\n <td>65</td>\n <td>10</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>high sales</td>\n <td>113</td>\n <td>35</td>\n <td>10</td>\n <td>269</td>\n <td>80</td>\n <td>Medium</td>\n <td>59</td>\n <td>12</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>low sales</td>\n <td>117</td>\n <td>100</td>\n <td>4</td>\n <td>466</td>\n <td>97</td>\n <td>Medium</td>\n <td>55</td>\n <td>14</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>4</th>\n <td>low sales</td>\n <td>141</td>\n <td>64</td>\n <td>3</td>\n <td>340</td>\n <td>128</td>\n <td>Bad</td>\n <td>38</td>\n <td>13</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>395</th>\n <td>high sales</td>\n <td>138</td>\n <td>108</td>\n <td>17</td>\n <td>203</td>\n <td>128</td>\n <td>Good</td>\n <td>33</td>\n <td>14</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>396</th>\n <td>low sales</td>\n <td>139</td>\n <td>23</td>\n <td>3</td>\n <td>37</td>\n <td>120</td>\n <td>Medium</td>\n <td>55</td>\n <td>11</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>397</th>\n <td>low sales</td>\n <td>162</td>\n <td>26</td>\n <td>12</td>\n <td>368</td>\n <td>159</td>\n <td>Medium</td>\n <td>40</td>\n <td>18</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>398</th>\n <td>low sales</td>\n <td>100</td>\n <td>79</td>\n <td>7</td>\n <td>284</td>\n <td>95</td>\n <td>Bad</td>\n <td>50</td>\n <td>12</td>\n <td>1</td>\n <td>1</td>\n </tr>\n <tr>\n <th>399</th>\n <td>high sales</td>\n <td>134</td>\n <td>37</td>\n <td>0</td>\n <td>27</td>\n <td>120</td>\n <td>Good</td>\n <td>49</td>\n <td>16</td>\n <td>1</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n<p>400 rows × 11 columns</p>\n</div>",
"text/plain": " Sales CompPrice Income Advertising Population Price ShelveLoc \\\n0 high sales 138 73 11 276 120 Bad \n1 high sales 111 48 16 260 83 Good \n2 high sales 113 35 10 269 80 Medium \n3 low sales 117 100 4 466 97 Medium \n4 low sales 141 64 3 340 128 Bad \n.. ... ... ... ... ... ... ... \n395 high sales 138 108 17 203 128 Good \n396 low sales 139 23 3 37 120 Medium \n397 low sales 162 26 12 368 159 Medium \n398 low sales 100 79 7 284 95 Bad \n399 high sales 134 37 0 27 120 Good \n\n Age Education Urban US \n0 42 17 1 1 \n1 65 10 1 1 \n2 59 12 1 1 \n3 55 14 1 1 \n4 38 13 1 0 \n.. ... ... ... .. \n395 33 14 1 1 \n396 55 11 0 1 \n397 40 18 1 1 \n398 50 12 1 1 \n399 49 16 1 1 \n\n[400 rows x 11 columns]"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:22.232688Z",
"start_time": "2023-08-01T10:53:22.145494Z"
},
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "XMCqfrhZ9M9x",
"outputId": "15caba84-7e7d-477b-db78-3858e49a37a1",
"trusted": true
},
"cell_type": "code",
"source": "x= data.iloc[:,1:]\nx = pd.get_dummies(x)\nx",
"execution_count": 8,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>CompPrice</th>\n <th>Income</th>\n <th>Advertising</th>\n <th>Population</th>\n <th>Price</th>\n <th>Age</th>\n <th>Education</th>\n <th>Urban</th>\n <th>US</th>\n <th>ShelveLoc_Bad</th>\n <th>ShelveLoc_Good</th>\n <th>ShelveLoc_Medium</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>138</td>\n <td>73</td>\n <td>11</td>\n <td>276</td>\n <td>120</td>\n <td>42</td>\n <td>17</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>111</td>\n <td>48</td>\n <td>16</td>\n <td>260</td>\n <td>83</td>\n <td>65</td>\n <td>10</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>113</td>\n <td>35</td>\n <td>10</td>\n <td>269</td>\n <td>80</td>\n <td>59</td>\n <td>12</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>117</td>\n <td>100</td>\n <td>4</td>\n <td>466</td>\n <td>97</td>\n <td>55</td>\n <td>14</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>4</th>\n <td>141</td>\n <td>64</td>\n <td>3</td>\n <td>340</td>\n <td>128</td>\n <td>38</td>\n <td>13</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>395</th>\n <td>138</td>\n <td>108</td>\n <td>17</td>\n <td>203</td>\n <td>128</td>\n <td>33</td>\n <td>14</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>396</th>\n <td>139</td>\n <td>23</td>\n <td>3</td>\n <td>37</td>\n <td>120</td>\n <td>55</td>\n <td>11</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>397</th>\n <td>162</td>\n <td>26</td>\n <td>12</td>\n <td>368</td>\n <td>159</td>\n <td>40</td>\n <td>18</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>398</th>\n <td>100</td>\n <td>79</td>\n <td>7</td>\n <td>284</td>\n <td>95</td>\n <td>50</td>\n <td>12</td>\n <td>1</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>399</th>\n <td>134</td>\n <td>37</td>\n <td>0</td>\n <td>27</td>\n <td>120</td>\n <td>49</td>\n <td>16</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>400 rows × 12 columns</p>\n</div>",
"text/plain": " CompPrice Income Advertising Population Price Age Education Urban \\\n0 138 73 11 276 120 42 17 1 \n1 111 48 16 260 83 65 10 1 \n2 113 35 10 269 80 59 12 1 \n3 117 100 4 466 97 55 14 1 \n4 141 64 3 340 128 38 13 1 \n.. ... ... ... ... ... ... ... ... \n395 138 108 17 203 128 33 14 1 \n396 139 23 3 37 120 55 11 0 \n397 162 26 12 368 159 40 18 1 \n398 100 79 7 284 95 50 12 1 \n399 134 37 0 27 120 49 16 1 \n\n US ShelveLoc_Bad ShelveLoc_Good ShelveLoc_Medium \n0 1 1 0 0 \n1 1 0 1 0 \n2 1 0 0 1 \n3 1 0 0 1 \n4 0 1 0 0 \n.. .. ... ... ... \n395 1 0 1 0 \n396 1 0 0 1 \n397 1 0 0 1 \n398 1 1 0 0 \n399 1 0 1 0 \n\n[400 rows x 12 columns]"
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:34.743867Z",
"start_time": "2023-08-01T10:53:34.662688Z"
},
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1NKCW9Gj9SDV",
"outputId": "011b8f89-eac6-47b2-d9a7-0f0eef7500c7",
"trusted": true
},
"cell_type": "code",
"source": "data['Sales']= label_encoder.fit_transform(data['Sales'])\ndata['Sales'].describe()",
"execution_count": 9,
"outputs": [
{
"data": {
"text/plain": "count 400.000000\nmean 0.527500\nstd 0.499868\nmin 0.000000\n25% 0.000000\n50% 1.000000\n75% 1.000000\nmax 1.000000\nName: Sales, dtype: float64"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:36.361091Z",
"start_time": "2023-08-01T10:53:36.336600Z"
},
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NZ9MzB0R9UFB",
"outputId": "3cbb959f-a242-45e4-e556-c944740a8c7f",
"trusted": true
},
"cell_type": "code",
"source": "y = 1- data['Sales']\ny\n",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": "0 1\n1 1\n2 1\n3 0\n4 0\n ..\n395 1\n396 0\n397 0\n398 0\n399 1\nName: Sales, Length: 400, dtype: int32"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:37.252797Z",
"start_time": "2023-08-01T10:53:37.205262Z"
},
"id": "EM9gqOq49YcE",
"trusted": true
},
"cell_type": "code",
"source": "# Splitting data into training and testing data set\nx_train, x_test,y_train,y_test = train_test_split(x,y, test_size=0.2,random_state=40)",
"execution_count": 11,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:38.194214Z",
"start_time": "2023-08-01T10:53:38.178574Z"
},
"id": "D1vDzFRp9dJP",
"trusted": true
},
"cell_type": "code",
"source": "# Random Forest Classification\nfrom pandas import read_csv\nfrom sklearn.model_selection import KFold\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:43.144730Z",
"start_time": "2023-08-01T10:53:39.058294Z"
},
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8VWw6laJ9ftu",
"outputId": "2b25ceaa-ff98-420f-ea34-d2b56098b918",
"trusted": true
},
"cell_type": "code",
"source": "num_trees = 100\nmax_features = 3\nkfold = KFold(n_splits=10, random_state=7,shuffle=True)\nmodel = RandomForestClassifier(n_estimators=num_trees, max_features=max_features)\nmodel.fit(x_train,y_train)\nresults = cross_val_score(model, x_train, y_train, cv=kfold)\nprint(results.mean())",
"execution_count": 13,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "0.809375\n"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-01T10:53:44.942824Z",
"start_time": "2023-08-01T10:53:44.894061Z"
},
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LsWCYjqx9kE5",
"outputId": "296e21fe-e456-4061-aa72-cb598899e591",
"trusted": true
},
"cell_type": "code",
"source": "model.feature_importances_",
"execution_count": 14,
"outputs": [
{
"data": {
"text/plain": "array([0.12318268, 0.10534729, 0.10905791, 0.07796257, 0.2276677 ,\n 0.12578517, 0.05377389, 0.01182007, 0.01977356, 0.04035731,\n 0.09018568, 0.01508617])"
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.9.13",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "!excelR/assignments/Gists/RF - Company Data.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment