Skip to content

Instantly share code, notes, and snippets.

@tomasonjo
Last active May 25, 2022 12:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomasonjo/46d842449316e429c3163abeec937b7a to your computer and use it in GitHub Desktop.
Save tomasonjo/46d842449316e429c3163abeec937b7a to your computer and use it in GitHub Desktop.
def feature_importance(columns, classifier):
features = list(zip(columns, classifier.feature_importances_))
sorted_features = sorted(features, key = lambda x: x[1]*-1)
keys = [value[0] for value in sorted_features]
values = [value[1] for value in sorted_features]
return pd.DataFrame(data={'feature': keys, 'value': values})
def evaluate(df):
df_X = df.drop('fraudRisk', axis=1)
df_y = df[['fraudRisk']]
X = df_X.values
y = df_y.values
y = LabelBinarizer().fit_transform(y)
# Test/train data split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)
# Oversample only the training data
oversample = SMOTE(random_state=42)
X_train, y_train = oversample.fit_resample(X_train, y_train)
# Random forrest classification
model = RandomForestClassifier(n_estimators=500, random_state=42, max_depth=5, bootstrap=True, class_weight='balanced')
model = model.fit(X_train, y_train)
# Evaluate the model
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, normalize= 'true')
RocCurveDisplay.from_estimator(model, X_test, y_test, name="RF Model")
print(feature_importance(df_X.columns.to_list(), model))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment