Last active
December 9, 2022 16:03
-
-
Save liannewriting/e90594eccf7c8033c824fd2d5881d068 to your computer and use it in GitHub Desktop.
xgboost python machine learning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# list and drop columns that are less related to the target based on my judgment | |
cols_to_drop = ['duration', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed'] | |
# at the same time, rename the columns so they are understandable. Please read the UCI page (https://archive.ics.uci.edu/ml/datasets/bank+marketing) for details | |
df = df.drop(columns=cols_to_drop).rename(columns={'job': 'job_type', 'default': 'default_status', | |
'housing': 'housing_loan_status', 'loan': 'personal_loan_status', | |
'contact': 'contact_type', 'month': 'contact_month', | |
'day_of_week': 'contact_day_of_week', 'campaign': 'num_contacts', | |
'pdays': 'days_last_contact', 'previous': 'previous_contacts', | |
'poutcome': 'previous_outcome', | |
'y': 'result' | |
}) | |
# convert the target to numerical values | |
df['result'] = df['result'].replace({'yes': 1, 'no': 0}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment