Created
April 6, 2022 17:26
-
-
Save chetanambi/2ee4b18372e3799dfa8e9279468aeb89 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "5fdb4026", | |
"metadata": {}, | |
"source": [ | |
"# Intel extension for Scikit-learn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "64f66056", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n" | |
] | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"#Intel(R) Extension for Scikit-learn dynamically patches scikit-learn estimators to use oneDAL as the underlying solver\n", | |
"from sklearnex import patch_sklearn\n", | |
"patch_sklearn()\n", | |
"\n", | |
"# Import datasets, Naive Bayes classifier and performance metrics\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"from sklearn.neighbors import KNeighborsClassifier\n", | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"from sklearn.metrics import accuracy_score" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "8d9ec649", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Dataset: https://www.kaggle.com/datasets/kamilpytlak/personal-key-indicators-of-heart-disease" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "201f71e1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('heart_2020_cleaned.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "677a7989", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(319795, 18)" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "e172d821", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"No 292422\n", | |
"Yes 27373\n", | |
"Name: HeartDisease, dtype: int64" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['HeartDisease'].value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "b017a4fe", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df['HeartDisease'] = df['HeartDisease'].map({'Yes': 1, 'No': 0})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "727f6777", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cat_cols = df.select_dtypes(include='object').columns.tolist()\n", | |
"\n", | |
"df = pd.get_dummies(df, columns=cat_cols) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "11567964", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X, y = df.drop('HeartDisease', axis=1), df['HeartDisease']\n", | |
"\n", | |
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "44a2a022", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"RandomForest Classifier Accuracy: 0.9088669026504397\n" | |
] | |
} | |
], | |
"source": [ | |
"reg = RandomForestClassifier(random_state=42)\n", | |
"reg.fit(X_train, y_train)\n", | |
"preds = reg.predict(X_test)\n", | |
"\n", | |
"print(\"RandomForest Classifier Accuracy:\",accuracy_score(y_test, preds))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "a2de0922", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"KNN Classifier Accuracy: 0.9064903876221091\n" | |
] | |
} | |
], | |
"source": [ | |
"reg = KNeighborsClassifier()\n", | |
"reg.fit(X_train, y_train)\n", | |
"preds = reg.predict(X_test)\n", | |
"\n", | |
"print(\"KNN Classifier Accuracy:\",accuracy_score(y_test, preds))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment