liannewriting/transform_data.py

## transform_data.py
# list and drop columns that are less related to the target based on my judgment
cols_to_drop = ['duration', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed']
# at the same time, rename the columns so they are understandable. Please read the UCI page (https://archive.ics.uci.edu/ml/datasets/bank+marketing) for details
df = df.drop(columns=cols_to_drop).rename(columns={'job': 'job_type', 'default': 'default_status',
                                                   'housing': 'housing_loan_status', 'loan': 'personal_loan_status',
                                                   'contact': 'contact_type', 'month': 'contact_month',
                                                   'day_of_week': 'contact_day_of_week', 'campaign': 'num_contacts',
                                                   'pdays': 'days_last_contact', 'previous': 'previous_contacts',
                                                   'poutcome': 'previous_outcome',
                                                   'y': 'result'
                                                    })
# convert the target to numerical values
df['result'] = df['result'].replace({'yes': 1, 'no': 0})
	# list and drop columns that are less related to the target based on my judgment
	cols_to_drop = ['duration', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed']
	# at the same time, rename the columns so they are understandable. Please read the UCI page (https://archive.ics.uci.edu/ml/datasets/bank+marketing) for details
	df = df.drop(columns=cols_to_drop).rename(columns={'job': 'job_type', 'default': 'default_status',
	'housing': 'housing_loan_status', 'loan': 'personal_loan_status',
	'contact': 'contact_type', 'month': 'contact_month',
	'day_of_week': 'contact_day_of_week', 'campaign': 'num_contacts',
	'pdays': 'days_last_contact', 'previous': 'previous_contacts',
	'poutcome': 'previous_outcome',
	'y': 'result'
	})
	# convert the target to numerical values
	df['result'] = df['result'].replace({'yes': 1, 'no': 0})