Skip to content

Instantly share code, notes, and snippets.

Paulooh007 /
Created March 7, 2020 16:12
import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
Paulooh007 /
Created March 7, 2020 16:39
read in data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
print('train data shape: ',train.shape,'\ntest data shape: ',test.shape)
# The first step is to convert the data type of the column (TRANSACTION TIME) to DateTime.
# That can be done using .to_datetime() method in Pandas.
train['transaction time']= pd.to_datetime(train['transaction time'])
train['hour'] = train['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
train['dayofweek_name'] = train['transaction time'].dt.weekday_name #day of week of transaction
train['is_weekend'] = np.where(train['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not
#Getting the period of the day from the hour feature
bins = [0,4,8,12,16,20,24]
period = ['Late Night', 'Early Morning','Morning','Noon','Eve','Night']
Paulooh007 /
Created March 7, 2020 20:24
feat eng 2
#Getting the recent amount withdrawn or deposited
train['prev_tran'] = train['current bank amount'] - train['last bank amount']
#credit if the value is positive or debit if otherwise
train['credit_or_debit'] = ['credit' if x > 0 else 'debit' for x in train['prev_tran'] ]
Paulooh007 / feat,py
Last active March 8, 2020 06:41
features = train[['current bank amount', 'last bank amount',
'time taken (seconds)', 'most recent bank amount', 'account type',
'age', 'occupation', 'credit card type',
'account source verification', 'transaction source method',
'account destination verification', 'dayofweek_name', 'is_weekend',
X = pd.get_dummies(columns=['account type',
'occupation', 'credit card type', 'credit_or_debit',
'account source verification', 'transaction source method',
'account destination verification', 'dayofweek_name','period_of_day'], data = features)
Paulooh007 /
Created March 8, 2020 05:15
import algo
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import auc, roc_auc_score
Paulooh007 /
Last active March 8, 2020 05:25
algo loop
models = []
models.append(('Lr', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('Knn', KNeighborsClassifier()))
models.append(('Decision_Tree', DecisionTreeClassifier()))
# evaluate each model in turn
results = []
names = []
for name, model in models:, y_train)
score = roc_auc_score(y_test, model.predict_proba(X_test)[:,1])
msg = "%s: %f " % (name, score)
Paulooh007 /
Created March 8, 2020 05:57
transform test
test['transaction time']= pd.to_datetime(test['transaction time'])
test['hour'] = test['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
test['dayofweek_name'] = test['transaction time'].dt.weekday_name #day of week of transaction
test['is_weekend'] = np.where(test['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not
test['period_of_day'] = pd.cut(test['hour'], bins=bins, labels=period)
#Getting the recent amount withdrawn or deposited
test['prev_tran'] = test['current bank amount'] - test['last bank amount']
#credit if the value is positive or debit if otherwise