This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train = pd.read_csv('train.csv') | |
test = pd.read_csv('test.csv') | |
print('train data shape: ',train.shape,'\ntest data shape: ',test.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The first step is to convert the data type of the column (TRANSACTION TIME) to DateTime. | |
# That can be done using .to_datetime() method in Pandas. | |
train['transaction time']= pd.to_datetime(train['transaction time']) | |
train['hour'] = train['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24) | |
train['dayofweek_name'] = train['transaction time'].dt.weekday_name #day of week of transaction | |
train['is_weekend'] = np.where(train['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not | |
#Getting the period of the day from the hour feature | |
bins = [0,4,8,12,16,20,24] | |
period = ['Late Night', 'Early Morning','Morning','Noon','Eve','Night'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Getting the recent amount withdrawn or deposited | |
train['prev_tran'] = train['current bank amount'] - train['last bank amount'] | |
#credit if the value is positive or debit if otherwise | |
train['credit_or_debit'] = ['credit' if x > 0 else 'debit' for x in train['prev_tran'] ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
features = train[['current bank amount', 'last bank amount', | |
'time taken (seconds)', 'most recent bank amount', 'account type', | |
'age', 'occupation', 'credit card type', | |
'account source verification', 'transaction source method', | |
'account destination verification', 'dayofweek_name', 'is_weekend', | |
'hour','prev_tran','credit_or_debit','period_of_day']] | |
X = pd.get_dummies(columns=['account type', | |
'occupation', 'credit card type', 'credit_or_debit', | |
'account source verification', 'transaction source method', | |
'account destination verification', 'dayofweek_name','period_of_day'], data = features) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import model_selection | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
from sklearn.metrics import auc, roc_auc_score |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
models = [] | |
models.append(('Lr', LogisticRegression())) | |
models.append(('LDA', LinearDiscriminantAnalysis())) | |
models.append(('Knn', KNeighborsClassifier())) | |
models.append(('Decision_Tree', DecisionTreeClassifier())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# evaluate each model in turn | |
results = [] | |
names = [] | |
for name, model in models: | |
model.fit(X_train, y_train) | |
score = roc_auc_score(y_test, model.predict_proba(X_test)[:,1]) | |
results.append(score) | |
names.append(name) | |
msg = "%s: %f " % (name, score) | |
print(msg) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
test['transaction time']= pd.to_datetime(test['transaction time']) | |
test['hour'] = test['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24) | |
test['dayofweek_name'] = test['transaction time'].dt.weekday_name #day of week of transaction | |
test['is_weekend'] = np.where(test['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not | |
test['period_of_day'] = pd.cut(test['hour'], bins=bins, labels=period) | |
#Getting the recent amount withdrawn or deposited | |
test['prev_tran'] = test['current bank amount'] - test['last bank amount'] | |
#credit if the value is positive or debit if otherwise |
OlderNewer