Skip to content

Instantly share code, notes, and snippets.

@eduardoftdo
Created April 25, 2020 16:58
Show Gist options
  • Save eduardoftdo/e3d2b7ca4a06d8d86b144482d0aed5a1 to your computer and use it in GitHub Desktop.
Save eduardoftdo/e3d2b7ca4a06d8d86b144482d0aed5a1 to your computer and use it in GitHub Desktop.
Apriori HR.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:00.319829Z",
"start_time": "2020-04-25T00:49:58.785333Z"
},
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd\nfrom mlxtend.frequent_patterns import apriori\nfrom mlxtend.frequent_patterns import association_rules",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:00.746955Z",
"start_time": "2020-04-25T00:50:00.742958Z"
},
"trusted": true
},
"cell_type": "code",
"source": "pd.set_option('display.max_columns', None)\npd.set_option('display.max_rows', None)\npd.set_option('display.max_colwidth', 999)",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:03.137179Z",
"start_time": "2020-04-25T00:50:03.119190Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:07.145911Z",
"start_time": "2020-04-25T00:50:07.118943Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df.head()",
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 4,
"data": {
"text/plain": " Age Attrition BusinessTravel DailyRate Department \\\n0 41 Yes Travel_Rarely 1102 Sales \n1 49 No Travel_Frequently 279 Research & Development \n2 37 Yes Travel_Rarely 1373 Research & Development \n3 33 No Travel_Frequently 1392 Research & Development \n4 27 No Travel_Rarely 591 Research & Development \n\n DistanceFromHome Education EducationField EmployeeCount EmployeeNumber \\\n0 1 2 Life Sciences 1 1 \n1 8 1 Life Sciences 1 2 \n2 2 2 Other 1 4 \n3 3 4 Life Sciences 1 5 \n4 2 1 Medical 1 7 \n\n EnvironmentSatisfaction Gender HourlyRate JobInvolvement JobLevel \\\n0 2 Female 94 3 2 \n1 3 Male 61 2 2 \n2 4 Male 92 2 1 \n3 4 Female 56 3 1 \n4 1 Male 40 3 1 \n\n JobRole JobSatisfaction MaritalStatus MonthlyIncome \\\n0 Sales Executive 4 Single 5993 \n1 Research Scientist 2 Married 5130 \n2 Laboratory Technician 3 Single 2090 \n3 Research Scientist 3 Married 2909 \n4 Laboratory Technician 2 Married 3468 \n\n MonthlyRate NumCompaniesWorked Over18 OverTime PercentSalaryHike \\\n0 19479 8 Y Yes 11 \n1 24907 1 Y No 23 \n2 2396 6 Y Yes 15 \n3 23159 1 Y Yes 11 \n4 16632 9 Y No 12 \n\n PerformanceRating RelationshipSatisfaction StandardHours \\\n0 3 1 80 \n1 4 4 80 \n2 3 2 80 \n3 3 3 80 \n4 3 4 80 \n\n StockOptionLevel TotalWorkingYears TrainingTimesLastYear \\\n0 0 8 0 \n1 1 10 3 \n2 0 7 3 \n3 0 8 3 \n4 1 6 3 \n\n WorkLifeBalance YearsAtCompany YearsInCurrentRole \\\n0 1 6 4 \n1 3 10 7 \n2 3 0 0 \n3 3 8 7 \n4 3 2 2 \n\n YearsSinceLastPromotion YearsWithCurrManager \n0 0 5 \n1 1 7 \n2 0 0 \n3 3 0 \n4 2 2 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Age</th>\n <th>Attrition</th>\n <th>BusinessTravel</th>\n <th>DailyRate</th>\n <th>Department</th>\n <th>DistanceFromHome</th>\n <th>Education</th>\n <th>EducationField</th>\n <th>EmployeeCount</th>\n <th>EmployeeNumber</th>\n <th>EnvironmentSatisfaction</th>\n <th>Gender</th>\n <th>HourlyRate</th>\n <th>JobInvolvement</th>\n <th>JobLevel</th>\n <th>JobRole</th>\n <th>JobSatisfaction</th>\n <th>MaritalStatus</th>\n <th>MonthlyIncome</th>\n <th>MonthlyRate</th>\n <th>NumCompaniesWorked</th>\n <th>Over18</th>\n <th>OverTime</th>\n <th>PercentSalaryHike</th>\n <th>PerformanceRating</th>\n <th>RelationshipSatisfaction</th>\n <th>StandardHours</th>\n <th>StockOptionLevel</th>\n <th>TotalWorkingYears</th>\n <th>TrainingTimesLastYear</th>\n <th>WorkLifeBalance</th>\n <th>YearsAtCompany</th>\n <th>YearsInCurrentRole</th>\n <th>YearsSinceLastPromotion</th>\n <th>YearsWithCurrManager</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>41</td>\n <td>Yes</td>\n <td>Travel_Rarely</td>\n <td>1102</td>\n <td>Sales</td>\n <td>1</td>\n <td>2</td>\n <td>Life Sciences</td>\n <td>1</td>\n <td>1</td>\n <td>2</td>\n <td>Female</td>\n <td>94</td>\n <td>3</td>\n <td>2</td>\n <td>Sales Executive</td>\n <td>4</td>\n <td>Single</td>\n <td>5993</td>\n <td>19479</td>\n <td>8</td>\n <td>Y</td>\n <td>Yes</td>\n <td>11</td>\n <td>3</td>\n <td>1</td>\n <td>80</td>\n <td>0</td>\n <td>8</td>\n <td>0</td>\n <td>1</td>\n <td>6</td>\n <td>4</td>\n <td>0</td>\n <td>5</td>\n </tr>\n <tr>\n <td>1</td>\n <td>49</td>\n <td>No</td>\n <td>Travel_Frequently</td>\n <td>279</td>\n <td>Research &amp; Development</td>\n <td>8</td>\n <td>1</td>\n <td>Life Sciences</td>\n <td>1</td>\n <td>2</td>\n <td>3</td>\n <td>Male</td>\n <td>61</td>\n <td>2</td>\n <td>2</td>\n <td>Research Scientist</td>\n <td>2</td>\n <td>Married</td>\n <td>5130</td>\n <td>24907</td>\n <td>1</td>\n <td>Y</td>\n <td>No</td>\n <td>23</td>\n <td>4</td>\n <td>4</td>\n <td>80</td>\n <td>1</td>\n <td>10</td>\n <td>3</td>\n <td>3</td>\n <td>10</td>\n <td>7</td>\n <td>1</td>\n <td>7</td>\n </tr>\n <tr>\n <td>2</td>\n <td>37</td>\n <td>Yes</td>\n <td>Travel_Rarely</td>\n <td>1373</td>\n <td>Research &amp; Development</td>\n <td>2</td>\n <td>2</td>\n <td>Other</td>\n <td>1</td>\n <td>4</td>\n <td>4</td>\n <td>Male</td>\n <td>92</td>\n <td>2</td>\n <td>1</td>\n <td>Laboratory Technician</td>\n <td>3</td>\n <td>Single</td>\n <td>2090</td>\n <td>2396</td>\n <td>6</td>\n <td>Y</td>\n <td>Yes</td>\n <td>15</td>\n <td>3</td>\n <td>2</td>\n <td>80</td>\n <td>0</td>\n <td>7</td>\n <td>3</td>\n <td>3</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <td>3</td>\n <td>33</td>\n <td>No</td>\n <td>Travel_Frequently</td>\n <td>1392</td>\n <td>Research &amp; Development</td>\n <td>3</td>\n <td>4</td>\n <td>Life Sciences</td>\n <td>1</td>\n <td>5</td>\n <td>4</td>\n <td>Female</td>\n <td>56</td>\n <td>3</td>\n <td>1</td>\n <td>Research Scientist</td>\n <td>3</td>\n <td>Married</td>\n <td>2909</td>\n <td>23159</td>\n <td>1</td>\n <td>Y</td>\n <td>Yes</td>\n <td>11</td>\n <td>3</td>\n <td>3</td>\n <td>80</td>\n <td>0</td>\n <td>8</td>\n <td>3</td>\n <td>3</td>\n <td>8</td>\n <td>7</td>\n <td>3</td>\n <td>0</td>\n </tr>\n <tr>\n <td>4</td>\n <td>27</td>\n <td>No</td>\n <td>Travel_Rarely</td>\n <td>591</td>\n <td>Research &amp; Development</td>\n <td>2</td>\n <td>1</td>\n <td>Medical</td>\n <td>1</td>\n <td>7</td>\n <td>1</td>\n <td>Male</td>\n <td>40</td>\n <td>3</td>\n <td>1</td>\n <td>Laboratory Technician</td>\n <td>2</td>\n <td>Married</td>\n <td>3468</td>\n <td>16632</td>\n <td>9</td>\n <td>Y</td>\n <td>No</td>\n <td>12</td>\n <td>3</td>\n <td>4</td>\n <td>80</td>\n <td>1</td>\n <td>6</td>\n <td>3</td>\n <td>3</td>\n <td>2</td>\n <td>2</td>\n <td>2</td>\n <td>2</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:08.903845Z",
"start_time": "2020-04-25T00:50:08.898845Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df.shape",
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "(1470, 35)"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:15.653754Z",
"start_time": "2020-04-25T00:50:15.611778Z"
},
"trusted": true
},
"cell_type": "code",
"source": "pd.qcut(df['Age'], q=4).tail()",
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 6,
"data": {
"text/plain": "1465 (30.0, 36.0]\n1466 (36.0, 43.0]\n1467 (17.999, 30.0]\n1468 (43.0, 60.0]\n1469 (30.0, 36.0]\nName: Age, dtype: category\nCategories (4, interval[float64]): [(17.999, 30.0] < (30.0, 36.0] < (36.0, 43.0] < (43.0, 60.0]]"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:16.691934Z",
"start_time": "2020-04-25T00:50:16.683925Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df['Age_Range'] = pd.qcut(df['Age'], q=4, labels=['<=30', '>30 <=36', '>36 <=43', '>43'])",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:18.252894Z",
"start_time": "2020-04-25T00:50:18.209913Z"
},
"trusted": true
},
"cell_type": "code",
"source": "pd.qcut(df['DistanceFromHome'], q=4).tail()",
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 8,
"data": {
"text/plain": "1465 (14.0, 29.0]\n1466 (2.0, 7.0]\n1467 (2.0, 7.0]\n1468 (0.999, 2.0]\n1469 (7.0, 14.0]\nName: DistanceFromHome, dtype: category\nCategories (4, interval[float64]): [(0.999, 2.0] < (2.0, 7.0] < (7.0, 14.0] < (14.0, 29.0]]"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:18.595824Z",
"start_time": "2020-04-25T00:50:18.587816Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df['DistanceFromHome_Range'] = pd.qcut(df['DistanceFromHome'], q=4, labels=['<=2', '>2 <=7', '>7 <=14', '>14'])",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:19.276621Z",
"start_time": "2020-04-25T00:50:19.236625Z"
},
"trusted": true
},
"cell_type": "code",
"source": "pd.qcut(df['HourlyRate'], q=4).tail()",
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 10,
"data": {
"text/plain": "1465 (29.999, 48.0]\n1466 (29.999, 48.0]\n1467 (83.75, 100.0]\n1468 (48.0, 66.0]\n1469 (66.0, 83.75]\nName: HourlyRate, dtype: category\nCategories (4, interval[float64]): [(29.999, 48.0] < (48.0, 66.0] < (66.0, 83.75] < (83.75, 100.0]]"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:19.587426Z",
"start_time": "2020-04-25T00:50:19.578431Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df['HourlyRate_Range'] = pd.qcut(df['DistanceFromHome'], q=4, labels=['<=48', '>48 >=66', '>66 <=83.75', '>83.75'])",
"execution_count": 11,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:50:27.206230Z",
"start_time": "2020-04-25T00:50:27.203249Z"
},
"trusted": true
},
"cell_type": "code",
"source": "columns = ['Attrition',\n 'Age_Range',\n 'BusinessTravel',\n 'Department',\n 'DistanceFromHome_Range',\n 'Education',\n 'EducationField',\n 'EnvironmentSatisfaction',\n 'Gender',\n 'HourlyRate_Range',\n 'JobInvolvement',\n 'JobLevel',\n 'JobRole',\n 'JobSatisfaction',\n 'MaritalStatus']\n\nnot_used_columns = list(set(df.columns.to_list()) - set(columns))",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df = pd.get_dummies(df, columns=columns)",
"execution_count": 13,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df.drop(labels=not_used_columns, axis=1, inplace=True)",
"execution_count": 14,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df.head()",
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 15,
"data": {
"text/plain": " Attrition_No Attrition_Yes Age_Range_<=30 Age_Range_>30 <=36 \\\n0 0 1 0 0 \n1 1 0 0 0 \n2 0 1 0 0 \n3 1 0 0 1 \n4 1 0 1 0 \n\n Age_Range_>36 <=43 Age_Range_>43 BusinessTravel_Non-Travel \\\n0 1 0 0 \n1 0 1 0 \n2 1 0 0 \n3 0 0 0 \n4 0 0 0 \n\n BusinessTravel_Travel_Frequently BusinessTravel_Travel_Rarely \\\n0 0 1 \n1 1 0 \n2 0 1 \n3 1 0 \n4 0 1 \n\n Department_Human Resources Department_Research & Development \\\n0 0 0 \n1 0 1 \n2 0 1 \n3 0 1 \n4 0 1 \n\n Department_Sales DistanceFromHome_Range_<=2 \\\n0 1 1 \n1 0 0 \n2 0 1 \n3 0 0 \n4 0 1 \n\n DistanceFromHome_Range_>2 <=7 DistanceFromHome_Range_>7 <=14 \\\n0 0 0 \n1 0 1 \n2 0 0 \n3 1 0 \n4 0 0 \n\n DistanceFromHome_Range_>14 Education_1 Education_2 Education_3 \\\n0 0 0 1 0 \n1 0 1 0 0 \n2 0 0 1 0 \n3 0 0 0 0 \n4 0 1 0 0 \n\n Education_4 Education_5 EducationField_Human Resources \\\n0 0 0 0 \n1 0 0 0 \n2 0 0 0 \n3 1 0 0 \n4 0 0 0 \n\n EducationField_Life Sciences EducationField_Marketing \\\n0 1 0 \n1 1 0 \n2 0 0 \n3 1 0 \n4 0 0 \n\n EducationField_Medical EducationField_Other \\\n0 0 0 \n1 0 0 \n2 0 1 \n3 0 0 \n4 1 0 \n\n EducationField_Technical Degree EnvironmentSatisfaction_1 \\\n0 0 0 \n1 0 0 \n2 0 0 \n3 0 0 \n4 0 1 \n\n EnvironmentSatisfaction_2 EnvironmentSatisfaction_3 \\\n0 1 0 \n1 0 1 \n2 0 0 \n3 0 0 \n4 0 0 \n\n EnvironmentSatisfaction_4 Gender_Female Gender_Male \\\n0 0 1 0 \n1 0 0 1 \n2 1 0 1 \n3 1 1 0 \n4 0 0 1 \n\n HourlyRate_Range_<=48 HourlyRate_Range_>48 >=66 \\\n0 1 0 \n1 0 0 \n2 1 0 \n3 0 1 \n4 1 0 \n\n HourlyRate_Range_>66 <=83.75 HourlyRate_Range_>83.75 JobInvolvement_1 \\\n0 0 0 0 \n1 1 0 0 \n2 0 0 0 \n3 0 0 0 \n4 0 0 0 \n\n JobInvolvement_2 JobInvolvement_3 JobInvolvement_4 JobLevel_1 \\\n0 0 1 0 0 \n1 1 0 0 0 \n2 1 0 0 1 \n3 0 1 0 1 \n4 0 1 0 1 \n\n JobLevel_2 JobLevel_3 JobLevel_4 JobLevel_5 \\\n0 1 0 0 0 \n1 1 0 0 0 \n2 0 0 0 0 \n3 0 0 0 0 \n4 0 0 0 0 \n\n JobRole_Healthcare Representative JobRole_Human Resources \\\n0 0 0 \n1 0 0 \n2 0 0 \n3 0 0 \n4 0 0 \n\n JobRole_Laboratory Technician JobRole_Manager \\\n0 0 0 \n1 0 0 \n2 1 0 \n3 0 0 \n4 1 0 \n\n JobRole_Manufacturing Director JobRole_Research Director \\\n0 0 0 \n1 0 0 \n2 0 0 \n3 0 0 \n4 0 0 \n\n JobRole_Research Scientist JobRole_Sales Executive \\\n0 0 1 \n1 1 0 \n2 0 0 \n3 1 0 \n4 0 0 \n\n JobRole_Sales Representative JobSatisfaction_1 JobSatisfaction_2 \\\n0 0 0 0 \n1 0 0 1 \n2 0 0 0 \n3 0 0 0 \n4 0 0 1 \n\n JobSatisfaction_3 JobSatisfaction_4 MaritalStatus_Divorced \\\n0 0 1 0 \n1 0 0 0 \n2 1 0 0 \n3 1 0 0 \n4 0 0 0 \n\n MaritalStatus_Married MaritalStatus_Single \n0 0 1 \n1 1 0 \n2 0 1 \n3 1 0 \n4 1 0 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Attrition_No</th>\n <th>Attrition_Yes</th>\n <th>Age_Range_&lt;=30</th>\n <th>Age_Range_&gt;30 &lt;=36</th>\n <th>Age_Range_&gt;36 &lt;=43</th>\n <th>Age_Range_&gt;43</th>\n <th>BusinessTravel_Non-Travel</th>\n <th>BusinessTravel_Travel_Frequently</th>\n <th>BusinessTravel_Travel_Rarely</th>\n <th>Department_Human Resources</th>\n <th>Department_Research &amp; Development</th>\n <th>Department_Sales</th>\n <th>DistanceFromHome_Range_&lt;=2</th>\n <th>DistanceFromHome_Range_&gt;2 &lt;=7</th>\n <th>DistanceFromHome_Range_&gt;7 &lt;=14</th>\n <th>DistanceFromHome_Range_&gt;14</th>\n <th>Education_1</th>\n <th>Education_2</th>\n <th>Education_3</th>\n <th>Education_4</th>\n <th>Education_5</th>\n <th>EducationField_Human Resources</th>\n <th>EducationField_Life Sciences</th>\n <th>EducationField_Marketing</th>\n <th>EducationField_Medical</th>\n <th>EducationField_Other</th>\n <th>EducationField_Technical Degree</th>\n <th>EnvironmentSatisfaction_1</th>\n <th>EnvironmentSatisfaction_2</th>\n <th>EnvironmentSatisfaction_3</th>\n <th>EnvironmentSatisfaction_4</th>\n <th>Gender_Female</th>\n <th>Gender_Male</th>\n <th>HourlyRate_Range_&lt;=48</th>\n <th>HourlyRate_Range_&gt;48 &gt;=66</th>\n <th>HourlyRate_Range_&gt;66 &lt;=83.75</th>\n <th>HourlyRate_Range_&gt;83.75</th>\n <th>JobInvolvement_1</th>\n <th>JobInvolvement_2</th>\n <th>JobInvolvement_3</th>\n <th>JobInvolvement_4</th>\n <th>JobLevel_1</th>\n <th>JobLevel_2</th>\n <th>JobLevel_3</th>\n <th>JobLevel_4</th>\n <th>JobLevel_5</th>\n <th>JobRole_Healthcare Representative</th>\n <th>JobRole_Human Resources</th>\n <th>JobRole_Laboratory Technician</th>\n <th>JobRole_Manager</th>\n <th>JobRole_Manufacturing Director</th>\n <th>JobRole_Research Director</th>\n <th>JobRole_Research Scientist</th>\n <th>JobRole_Sales Executive</th>\n <th>JobRole_Sales Representative</th>\n <th>JobSatisfaction_1</th>\n <th>JobSatisfaction_2</th>\n <th>JobSatisfaction_3</th>\n <th>JobSatisfaction_4</th>\n <th>MaritalStatus_Divorced</th>\n <th>MaritalStatus_Married</th>\n <th>MaritalStatus_Single</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <td>2</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <td>3</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <td>4</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T01:14:24.203223Z",
"start_time": "2020-04-25T01:14:22.451663Z"
},
"trusted": true
},
"cell_type": "code",
"source": "#Apriori min support\nmin_support = 0.05\n\n#Max lenght of apriori n-grams\nmax_len = 3\n\nfrequent_items = apriori(df, use_colnames=True, min_support=min_support, max_len=max_len + 1)\nrules = association_rules(frequent_items, metric='lift', min_threshold=1)\n\nrules.head(10).sort_values(by='confidence', ascending=False)",
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 16,
"data": {
"text/plain": " antecedents consequents \\\n7 (BusinessTravel_Non-Travel) (Attrition_No) \n2 (Age_Range_>36 <=43) (Attrition_No) \n4 (Age_Range_>43) (Attrition_No) \n8 (BusinessTravel_Travel_Rarely) (Attrition_No) \n1 (Age_Range_>30 <=36) (Attrition_No) \n9 (Attrition_No) (BusinessTravel_Travel_Rarely) \n0 (Attrition_No) (Age_Range_>30 <=36) \n5 (Attrition_No) (Age_Range_>43) \n3 (Attrition_No) (Age_Range_>36 <=43) \n6 (Attrition_No) (BusinessTravel_Non-Travel) \n\n antecedent support consequent support support confidence lift \\\n7 0.102041 0.838776 0.093878 0.920000 1.096837 \n2 0.221088 0.838776 0.201361 0.910769 1.085832 \n4 0.236054 0.838776 0.207483 0.878963 1.047912 \n8 0.709524 0.838776 0.603401 0.850431 1.013896 \n1 0.280272 0.838776 0.235374 0.839806 1.001228 \n9 0.838776 0.709524 0.603401 0.719384 1.013896 \n0 0.838776 0.280272 0.235374 0.280616 1.001228 \n5 0.838776 0.236054 0.207483 0.247364 1.047912 \n3 0.838776 0.221088 0.201361 0.240065 1.085832 \n6 0.838776 0.102041 0.093878 0.111922 1.096837 \n\n leverage conviction \n7 0.008288 2.015306 \n2 0.015917 1.806826 \n4 0.009486 1.332021 \n8 0.008270 1.077930 \n1 0.000289 1.006432 \n9 0.008270 1.035136 \n0 0.000289 1.000479 \n5 0.009486 1.015027 \n3 0.015917 1.024971 \n6 0.008288 1.011127 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>antecedents</th>\n <th>consequents</th>\n <th>antecedent support</th>\n <th>consequent support</th>\n <th>support</th>\n <th>confidence</th>\n <th>lift</th>\n <th>leverage</th>\n <th>conviction</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>7</td>\n <td>(BusinessTravel_Non-Travel)</td>\n <td>(Attrition_No)</td>\n <td>0.102041</td>\n <td>0.838776</td>\n <td>0.093878</td>\n <td>0.920000</td>\n <td>1.096837</td>\n <td>0.008288</td>\n <td>2.015306</td>\n </tr>\n <tr>\n <td>2</td>\n <td>(Age_Range_&gt;36 &lt;=43)</td>\n <td>(Attrition_No)</td>\n <td>0.221088</td>\n <td>0.838776</td>\n <td>0.201361</td>\n <td>0.910769</td>\n <td>1.085832</td>\n <td>0.015917</td>\n <td>1.806826</td>\n </tr>\n <tr>\n <td>4</td>\n <td>(Age_Range_&gt;43)</td>\n <td>(Attrition_No)</td>\n <td>0.236054</td>\n <td>0.838776</td>\n <td>0.207483</td>\n <td>0.878963</td>\n <td>1.047912</td>\n <td>0.009486</td>\n <td>1.332021</td>\n </tr>\n <tr>\n <td>8</td>\n <td>(BusinessTravel_Travel_Rarely)</td>\n <td>(Attrition_No)</td>\n <td>0.709524</td>\n <td>0.838776</td>\n <td>0.603401</td>\n <td>0.850431</td>\n <td>1.013896</td>\n <td>0.008270</td>\n <td>1.077930</td>\n </tr>\n <tr>\n <td>1</td>\n <td>(Age_Range_&gt;30 &lt;=36)</td>\n <td>(Attrition_No)</td>\n <td>0.280272</td>\n <td>0.838776</td>\n <td>0.235374</td>\n <td>0.839806</td>\n <td>1.001228</td>\n <td>0.000289</td>\n <td>1.006432</td>\n </tr>\n <tr>\n <td>9</td>\n <td>(Attrition_No)</td>\n <td>(BusinessTravel_Travel_Rarely)</td>\n <td>0.838776</td>\n <td>0.709524</td>\n <td>0.603401</td>\n <td>0.719384</td>\n <td>1.013896</td>\n <td>0.008270</td>\n <td>1.035136</td>\n </tr>\n <tr>\n <td>0</td>\n <td>(Attrition_No)</td>\n <td>(Age_Range_&gt;30 &lt;=36)</td>\n <td>0.838776</td>\n <td>0.280272</td>\n <td>0.235374</td>\n <td>0.280616</td>\n <td>1.001228</td>\n <td>0.000289</td>\n <td>1.000479</td>\n </tr>\n <tr>\n <td>5</td>\n <td>(Attrition_No)</td>\n <td>(Age_Range_&gt;43)</td>\n <td>0.838776</td>\n <td>0.236054</td>\n <td>0.207483</td>\n <td>0.247364</td>\n <td>1.047912</td>\n <td>0.009486</td>\n <td>1.015027</td>\n </tr>\n <tr>\n <td>3</td>\n <td>(Attrition_No)</td>\n <td>(Age_Range_&gt;36 &lt;=43)</td>\n <td>0.838776</td>\n <td>0.221088</td>\n <td>0.201361</td>\n <td>0.240065</td>\n <td>1.085832</td>\n <td>0.015917</td>\n <td>1.024971</td>\n </tr>\n <tr>\n <td>6</td>\n <td>(Attrition_No)</td>\n <td>(BusinessTravel_Non-Travel)</td>\n <td>0.838776</td>\n <td>0.102041</td>\n <td>0.093878</td>\n <td>0.111922</td>\n <td>1.096837</td>\n <td>0.008288</td>\n <td>1.011127</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:55:13.695617Z",
"start_time": "2020-04-25T00:55:13.688637Z"
},
"trusted": true
},
"cell_type": "code",
"source": "df['Attrition_No'].value_counts()",
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 17,
"data": {
"text/plain": "1 1233\n0 237\nName: Attrition_No, dtype: int64"
},
"metadata": {}
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:54:49.953008Z",
"start_time": "2020-04-25T00:54:49.917020Z"
},
"trusted": true
},
"cell_type": "code",
"source": "'''\nFiltering only consequents with Attrition NO\n'''\n\n#Apriori min support\nmin_support = 0.1\n\n#Max lenght of apriori n-grams\nmax_len = 3\n\nfrequent_items = apriori(df, use_colnames=True, min_support=min_support, max_len=max_len + 1)\nrules = association_rules(frequent_items, metric='lift', min_threshold=1)\n\ntarget = '{\\'Attrition_No\\'}'\n\nresults_attrition_no = rules[rules['consequents'].astype(str).str.contains(target, na=False)].sort_values(by='confidence', ascending=False)\n\nresults_attrition_no.head(10)",
"execution_count": 18,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 18,
"data": {
"text/plain": " antecedents \\\n1091 (JobLevel_2, JobSatisfaction_4) \n636 (JobLevel_2, Department_Research & Development) \n2720 (JobLevel_2, JobInvolvement_3, Department_Research & Development) \n2082 (BusinessTravel_Travel_Rarely, JobLevel_2, Department_Research & Development) \n2658 (JobLevel_2, Gender_Male, Department_Research & Development) \n848 (JobLevel_2, EducationField_Life Sciences) \n383 (Age_Range_>36 <=43, Department_Research & Development) \n723 (DistanceFromHome_Range_<=2, MaritalStatus_Married) \n2865 (DistanceFromHome_Range_<=2, HourlyRate_Range_<=48, MaritalStatus_Married) \n1017 (HourlyRate_Range_<=48, MaritalStatus_Married) \n\n consequents antecedent support consequent support support \\\n1091 (Attrition_No) 0.117007 0.838776 0.110884 \n636 (Attrition_No) 0.191156 0.838776 0.180952 \n2720 (Attrition_No) 0.111565 0.838776 0.105442 \n2082 (Attrition_No) 0.127211 0.838776 0.119728 \n2658 (Attrition_No) 0.112245 0.838776 0.104762 \n848 (Attrition_No) 0.146939 0.838776 0.136054 \n383 (Attrition_No) 0.150340 0.838776 0.138776 \n723 (Attrition_No) 0.129252 0.838776 0.119048 \n2865 (Attrition_No) 0.129252 0.838776 0.119048 \n1017 (Attrition_No) 0.129252 0.838776 0.119048 \n\n confidence lift leverage conviction \n1091 0.947674 1.129831 0.012742 3.081179 \n636 0.946619 1.128573 0.020615 3.020272 \n2720 0.945122 1.126788 0.011865 2.937868 \n2082 0.941176 1.122084 0.013027 2.740816 \n2658 0.933333 1.112733 0.010614 2.418367 \n848 0.925926 1.103902 0.012806 2.176531 \n383 0.923077 1.100505 0.012674 2.095918 \n723 0.921053 1.098092 0.010634 2.042177 \n2865 0.921053 1.098092 0.010634 2.042177 \n1017 0.921053 1.098092 0.010634 2.042177 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>antecedents</th>\n <th>consequents</th>\n <th>antecedent support</th>\n <th>consequent support</th>\n <th>support</th>\n <th>confidence</th>\n <th>lift</th>\n <th>leverage</th>\n <th>conviction</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>1091</td>\n <td>(JobLevel_2, JobSatisfaction_4)</td>\n <td>(Attrition_No)</td>\n <td>0.117007</td>\n <td>0.838776</td>\n <td>0.110884</td>\n <td>0.947674</td>\n <td>1.129831</td>\n <td>0.012742</td>\n <td>3.081179</td>\n </tr>\n <tr>\n <td>636</td>\n <td>(JobLevel_2, Department_Research &amp; Development)</td>\n <td>(Attrition_No)</td>\n <td>0.191156</td>\n <td>0.838776</td>\n <td>0.180952</td>\n <td>0.946619</td>\n <td>1.128573</td>\n <td>0.020615</td>\n <td>3.020272</td>\n </tr>\n <tr>\n <td>2720</td>\n <td>(JobLevel_2, JobInvolvement_3, Department_Research &amp; Development)</td>\n <td>(Attrition_No)</td>\n <td>0.111565</td>\n <td>0.838776</td>\n <td>0.105442</td>\n <td>0.945122</td>\n <td>1.126788</td>\n <td>0.011865</td>\n <td>2.937868</td>\n </tr>\n <tr>\n <td>2082</td>\n <td>(BusinessTravel_Travel_Rarely, JobLevel_2, Department_Research &amp; Development)</td>\n <td>(Attrition_No)</td>\n <td>0.127211</td>\n <td>0.838776</td>\n <td>0.119728</td>\n <td>0.941176</td>\n <td>1.122084</td>\n <td>0.013027</td>\n <td>2.740816</td>\n </tr>\n <tr>\n <td>2658</td>\n <td>(JobLevel_2, Gender_Male, Department_Research &amp; Development)</td>\n <td>(Attrition_No)</td>\n <td>0.112245</td>\n <td>0.838776</td>\n <td>0.104762</td>\n <td>0.933333</td>\n <td>1.112733</td>\n <td>0.010614</td>\n <td>2.418367</td>\n </tr>\n <tr>\n <td>848</td>\n <td>(JobLevel_2, EducationField_Life Sciences)</td>\n <td>(Attrition_No)</td>\n <td>0.146939</td>\n <td>0.838776</td>\n <td>0.136054</td>\n <td>0.925926</td>\n <td>1.103902</td>\n <td>0.012806</td>\n <td>2.176531</td>\n </tr>\n <tr>\n <td>383</td>\n <td>(Age_Range_&gt;36 &lt;=43, Department_Research &amp; Development)</td>\n <td>(Attrition_No)</td>\n <td>0.150340</td>\n <td>0.838776</td>\n <td>0.138776</td>\n <td>0.923077</td>\n <td>1.100505</td>\n <td>0.012674</td>\n <td>2.095918</td>\n </tr>\n <tr>\n <td>723</td>\n <td>(DistanceFromHome_Range_&lt;=2, MaritalStatus_Married)</td>\n <td>(Attrition_No)</td>\n <td>0.129252</td>\n <td>0.838776</td>\n <td>0.119048</td>\n <td>0.921053</td>\n <td>1.098092</td>\n <td>0.010634</td>\n <td>2.042177</td>\n </tr>\n <tr>\n <td>2865</td>\n <td>(DistanceFromHome_Range_&lt;=2, HourlyRate_Range_&lt;=48, MaritalStatus_Married)</td>\n <td>(Attrition_No)</td>\n <td>0.129252</td>\n <td>0.838776</td>\n <td>0.119048</td>\n <td>0.921053</td>\n <td>1.098092</td>\n <td>0.010634</td>\n <td>2.042177</td>\n </tr>\n <tr>\n <td>1017</td>\n <td>(HourlyRate_Range_&lt;=48, MaritalStatus_Married)</td>\n <td>(Attrition_No)</td>\n <td>0.129252</td>\n <td>0.838776</td>\n <td>0.119048</td>\n <td>0.921053</td>\n <td>1.098092</td>\n <td>0.010634</td>\n <td>2.042177</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "results_attrition_no['antecedents'] = results_attrition_no['antecedents'].apply(lambda x: ','.join(list(x))).astype('unicode')\nresults_attrition_no['antecedents'] = results_attrition_no['antecedents'].str.title().str.replace('_', ' ')\nresults_attrition_no['consequents'] = results_attrition_no['consequents'].apply(lambda x: ','.join(list(x))).astype('unicode')\nresults_attrition_no['consequents'] = results_attrition_no['consequents'].str.title().str.replace('_', ' ')",
"execution_count": 19,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "results_attrition_no.to_csv('results_attrition_no.csv')",
"execution_count": 20,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-25T00:54:47.604300Z",
"start_time": "2020-04-25T00:54:47.581313Z"
},
"trusted": true
},
"cell_type": "code",
"source": "'''\nFiltering only consequents with Attrition YES\n'''\n\n#Apriori min support\nmin_support = 0.02\n\n#Max lenght of apriori n-grams\nmax_len = 3\n\nfrequent_items = apriori(df, use_colnames=True, min_support=min_support, max_len=max_len + 1)\nrules = association_rules(frequent_items, metric='lift', min_threshold=1)\n\ntarget = '{\\'Attrition_Yes\\'}'\n\nresults_attrition_yes = rules[rules['consequents'].astype(str).str.contains(target, na=False)].sort_values(by='confidence', ascending=False)\n\nresults_attrition_yes.head(10)",
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 21,
"data": {
"text/plain": " antecedents \\\n67292 (Age_Range_<=30, JobLevel_1, MaritalStatus_Single) \n67252 (Age_Range_<=30, JobLevel_1, JobInvolvement_2) \n4794 (Age_Range_<=30, BusinessTravel_Travel_Frequently) \n4914 (JobLevel_1, BusinessTravel_Travel_Frequently) \n68072 (JobRole_Sales Representative, JobLevel_1, Department_Sales) \n5070 (JobLevel_1, Department_Sales) \n5470 (JobRole_Sales Representative, JobLevel_1) \n68316 (Education_3, JobLevel_1, MaritalStatus_Single) \n5082 (JobRole_Sales Representative, Department_Sales) \n102 (JobRole_Sales Representative) \n\n consequents antecedent support consequent support support \\\n67292 (Attrition_Yes) 0.072789 0.161224 0.033333 \n67252 (Attrition_Yes) 0.047619 0.161224 0.021088 \n4794 (Attrition_Yes) 0.046939 0.161224 0.020408 \n4914 (Attrition_Yes) 0.070748 0.161224 0.030612 \n68072 (Attrition_Yes) 0.051701 0.161224 0.021769 \n5070 (Attrition_Yes) 0.051701 0.161224 0.021769 \n5470 (Attrition_Yes) 0.051701 0.161224 0.021769 \n68316 (Attrition_Yes) 0.061224 0.161224 0.025170 \n5082 (Attrition_Yes) 0.056463 0.161224 0.022449 \n102 (Attrition_Yes) 0.056463 0.161224 0.022449 \n\n confidence lift leverage conviction \n67292 0.457944 2.840412 0.021598 1.547396 \n67252 0.442857 2.746835 0.013411 1.505495 \n4794 0.434783 2.696753 0.012840 1.483987 \n4914 0.432692 2.683788 0.019206 1.478520 \n68072 0.421053 2.611592 0.013433 1.448794 \n5070 0.421053 2.611592 0.013433 1.448794 \n5470 0.421053 2.611592 0.013433 1.448794 \n68316 0.411111 2.549930 0.015299 1.424336 \n5082 0.397590 2.466067 0.013346 1.392367 \n102 0.397590 2.466067 0.013346 1.392367 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>antecedents</th>\n <th>consequents</th>\n <th>antecedent support</th>\n <th>consequent support</th>\n <th>support</th>\n <th>confidence</th>\n <th>lift</th>\n <th>leverage</th>\n <th>conviction</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>67292</td>\n <td>(Age_Range_&lt;=30, JobLevel_1, MaritalStatus_Single)</td>\n <td>(Attrition_Yes)</td>\n <td>0.072789</td>\n <td>0.161224</td>\n <td>0.033333</td>\n <td>0.457944</td>\n <td>2.840412</td>\n <td>0.021598</td>\n <td>1.547396</td>\n </tr>\n <tr>\n <td>67252</td>\n <td>(Age_Range_&lt;=30, JobLevel_1, JobInvolvement_2)</td>\n <td>(Attrition_Yes)</td>\n <td>0.047619</td>\n <td>0.161224</td>\n <td>0.021088</td>\n <td>0.442857</td>\n <td>2.746835</td>\n <td>0.013411</td>\n <td>1.505495</td>\n </tr>\n <tr>\n <td>4794</td>\n <td>(Age_Range_&lt;=30, BusinessTravel_Travel_Frequently)</td>\n <td>(Attrition_Yes)</td>\n <td>0.046939</td>\n <td>0.161224</td>\n <td>0.020408</td>\n <td>0.434783</td>\n <td>2.696753</td>\n <td>0.012840</td>\n <td>1.483987</td>\n </tr>\n <tr>\n <td>4914</td>\n <td>(JobLevel_1, BusinessTravel_Travel_Frequently)</td>\n <td>(Attrition_Yes)</td>\n <td>0.070748</td>\n <td>0.161224</td>\n <td>0.030612</td>\n <td>0.432692</td>\n <td>2.683788</td>\n <td>0.019206</td>\n <td>1.478520</td>\n </tr>\n <tr>\n <td>68072</td>\n <td>(JobRole_Sales Representative, JobLevel_1, Department_Sales)</td>\n <td>(Attrition_Yes)</td>\n <td>0.051701</td>\n <td>0.161224</td>\n <td>0.021769</td>\n <td>0.421053</td>\n <td>2.611592</td>\n <td>0.013433</td>\n <td>1.448794</td>\n </tr>\n <tr>\n <td>5070</td>\n <td>(JobLevel_1, Department_Sales)</td>\n <td>(Attrition_Yes)</td>\n <td>0.051701</td>\n <td>0.161224</td>\n <td>0.021769</td>\n <td>0.421053</td>\n <td>2.611592</td>\n <td>0.013433</td>\n <td>1.448794</td>\n </tr>\n <tr>\n <td>5470</td>\n <td>(JobRole_Sales Representative, JobLevel_1)</td>\n <td>(Attrition_Yes)</td>\n <td>0.051701</td>\n <td>0.161224</td>\n <td>0.021769</td>\n <td>0.421053</td>\n <td>2.611592</td>\n <td>0.013433</td>\n <td>1.448794</td>\n </tr>\n <tr>\n <td>68316</td>\n <td>(Education_3, JobLevel_1, MaritalStatus_Single)</td>\n <td>(Attrition_Yes)</td>\n <td>0.061224</td>\n <td>0.161224</td>\n <td>0.025170</td>\n <td>0.411111</td>\n <td>2.549930</td>\n <td>0.015299</td>\n <td>1.424336</td>\n </tr>\n <tr>\n <td>5082</td>\n <td>(JobRole_Sales Representative, Department_Sales)</td>\n <td>(Attrition_Yes)</td>\n <td>0.056463</td>\n <td>0.161224</td>\n <td>0.022449</td>\n <td>0.397590</td>\n <td>2.466067</td>\n <td>0.013346</td>\n <td>1.392367</td>\n </tr>\n <tr>\n <td>102</td>\n <td>(JobRole_Sales Representative)</td>\n <td>(Attrition_Yes)</td>\n <td>0.056463</td>\n <td>0.161224</td>\n <td>0.022449</td>\n <td>0.397590</td>\n <td>2.466067</td>\n <td>0.013346</td>\n <td>1.392367</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "results_attrition_yes['antecedents'] = results_attrition_yes['antecedents'].apply(lambda x: ','.join(list(x))).astype('unicode')\nresults_attrition_yes['antecedents'] = results_attrition_yes['antecedents'].str.title().str.replace('_', ' ')\nresults_attrition_yes['consequents'] = results_attrition_yes['consequents'].apply(lambda x: ','.join(list(x))).astype('unicode')\nresults_attrition_yes['consequents'] = results_attrition_yes['consequents'].str.title().str.replace('_', ' ')",
"execution_count": 22,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "results_attrition_yes.to_csv('results_attrition_yes.csv')",
"execution_count": 23,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.4",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"gist": {
"id": "a4b37e803d5710d0456fdf3860401226",
"data": {
"description": "Apriori HR.ipynb",
"public": true
}
},
"_draft": {
"nbviewer_url": "https://gist.github.com/a4b37e803d5710d0456fdf3860401226"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment