This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| rfacc = [] | |
| adaacc = [] | |
| for num in range(4,40): | |
| # Extract the names of the most important features | |
| important_feature_names = [feature[0] for feature in feature_importances[0:num]] | |
| # Find the columns of the most important features | |
| important_indices = [feature_list.index(feature) for feature in important_feature_names] | |
| # Create training and testing sets with only the important features | |
| important_train_features = X_train_scaled.iloc[:, important_indices] | |
| important_test_features = X_test_scaled.iloc[:, important_indices] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| feature_list = list(X_train_scaled.columns) | |
| # Get numerical feature importances | |
| importances = list(rfclf.feature_importances_) | |
| # List of tuples with variable and importance | |
| feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)] | |
| # Sort the feature importances by most important first | |
| feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True) | |
| # Print out the feature and importances | |
| [print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.ensemble import GradientBoostingClassifier | |
| gbclf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.5,max_depth=1, random_state=0).fit(X_train_scaled, y_train) | |
| print ('Gradient Boosting classifier accuracy=%.3f' %(gbclf.score(X_test_scaled, y_test))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.ensemble import AdaBoostClassifier | |
| clf = AdaBoostClassifier(n_estimators=1000) | |
| clf.fit( X_train_scaled, y_train) | |
| y_pred_ada = clf.predict(X_test_scaled) | |
| print('AdaBoost accuracy=%.3f' % (accuracy_score(y_test, y_pred_ada))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.model_selection import GridSearchCV | |
| # Create the parameter grid based on the results of random search | |
| param_grid = { | |
| 'bootstrap': [True], | |
| 'max_depth': [10, 20, 50, 80], | |
| 'max_features': [2, 3], | |
| 'min_samples_leaf': [3, 4, 5], | |
| 'min_samples_split': [8, 10, 12], | |
| 'n_estimators': [500, 1000] | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from sklearn.preprocessing import LabelEncoder, OneHotEncoder | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics import accuracy_score | |
| import seaborn as sns | |
| import numpy as np | |
| raw_data_train = pd.read_csv('au_train.csv') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # roc curve and auc | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import roc_curve | |
| from sklearn.metrics import roc_auc_score | |
| from matplotlib import pyplot | |
| from matplotlib.pyplot import figure | |
| # generate a no skill prediction (majority class) | |
| ns_probs = [0 for _ in range(len(y_test))] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Multi-layer perceptron | |
| from sklearn.neural_network import MLPClassifier | |
| mlpclf = MLPClassifier(max_iter=100,activation = 'relu',solver='adam',random_state=1) | |
| mlpclf.fit(X_train_scaled, y_train) | |
| y_pred_mlp = mlpclf.predict(X_test_scaled) | |
| mlp_probs = mlpclf.predict_proba(X_test_scaled) | |
| mlp_probs = mlp_probs[:, 1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SVM classifier | |
| from sklearn import svm | |
| svmclf = svm.SVC(kernel='rbf', probability=True) | |
| svmclf.fit(X_train_scaled, y_train) | |
| y_pred_svm = svmclf.predict(X_test_scaled) | |
| svm_probs = svmclf.predict_proba(X_test_scaled) | |
| svm_probs = svm_probs[:, 1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Random Forest classifier | |
| from sklearn.ensemble import RandomForestClassifier | |
| rfclf = RandomForestClassifier(max_depth=10, random_state=0) | |
| rfclf.fit(X_train_scaled, y_train) | |
| y_pred_rf = rfclf.predict(X_test_scaled) | |
| rf_probs = rfclf.predict_proba(X_test_scaled) | |
| rf_probs = rf_probs[:, 1] |