Skip to content

Instantly share code, notes, and snippets.

rfacc = []
adaacc = []
for num in range(4,40):
# Extract the names of the most important features
important_feature_names = [feature[0] for feature in feature_importances[0:num]]
# Find the columns of the most important features
important_indices = [feature_list.index(feature) for feature in important_feature_names]
# Create training and testing sets with only the important features
important_train_features = X_train_scaled.iloc[:, important_indices]
important_test_features = X_test_scaled.iloc[:, important_indices]
feature_list = list(X_train_scaled.columns)
# Get numerical feature importances
importances = list(rfclf.feature_importances_)
# List of tuples with variable and importance
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort the feature importances by most important first
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
# Print out the feature and importances
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances]
from sklearn.ensemble import GradientBoostingClassifier
gbclf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.5,max_depth=1, random_state=0).fit(X_train_scaled, y_train)
print ('Gradient Boosting classifier accuracy=%.3f' %(gbclf.score(X_test_scaled, y_test)))
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators=1000)
clf.fit( X_train_scaled, y_train)
y_pred_ada = clf.predict(X_test_scaled)
print('AdaBoost accuracy=%.3f' % (accuracy_score(y_test, y_pred_ada)))
from sklearn.model_selection import GridSearchCV
# Create the parameter grid based on the results of random search
param_grid = {
'bootstrap': [True],
'max_depth': [10, 20, 50, 80],
'max_features': [2, 3],
'min_samples_leaf': [3, 4, 5],
'min_samples_split': [8, 10, 12],
'n_estimators': [500, 1000]
}
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import seaborn as sns
import numpy as np
raw_data_train = pd.read_csv('au_train.csv')
# roc curve and auc
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot
from matplotlib.pyplot import figure
# generate a no skill prediction (majority class)
ns_probs = [0 for _ in range(len(y_test))]
# Multi-layer perceptron
from sklearn.neural_network import MLPClassifier
mlpclf = MLPClassifier(max_iter=100,activation = 'relu',solver='adam',random_state=1)
mlpclf.fit(X_train_scaled, y_train)
y_pred_mlp = mlpclf.predict(X_test_scaled)
mlp_probs = mlpclf.predict_proba(X_test_scaled)
mlp_probs = mlp_probs[:, 1]
@ajey091
ajey091 / income9.py
Last active February 19, 2020 17:49
# SVM classifier
from sklearn import svm
svmclf = svm.SVC(kernel='rbf', probability=True)
svmclf.fit(X_train_scaled, y_train)
y_pred_svm = svmclf.predict(X_test_scaled)
svm_probs = svmclf.predict_proba(X_test_scaled)
svm_probs = svm_probs[:, 1]
# Random Forest classifier
from sklearn.ensemble import RandomForestClassifier
rfclf = RandomForestClassifier(max_depth=10, random_state=0)
rfclf.fit(X_train_scaled, y_train)
y_pred_rf = rfclf.predict(X_test_scaled)
rf_probs = rfclf.predict_proba(X_test_scaled)
rf_probs = rf_probs[:, 1]