Skip to content

Instantly share code, notes, and snippets.

@Paulooh007
Paulooh007 / import.py
Created March 7, 2020 16:12
import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
@Paulooh007
Paulooh007 / read.py
Created March 7, 2020 16:39
read in data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
print('train data shape: ',train.shape,'\ntest data shape: ',test.shape)
# The first step is to convert the data type of the column (TRANSACTION TIME) to DateTime.
# That can be done using .to_datetime() method in Pandas.
train['transaction time']= pd.to_datetime(train['transaction time'])
train['hour'] = train['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
train['dayofweek_name'] = train['transaction time'].dt.weekday_name #day of week of transaction
train['is_weekend'] = np.where(train['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not
#Getting the period of the day from the hour feature
bins = [0,4,8,12,16,20,24]
period = ['Late Night', 'Early Morning','Morning','Noon','Eve','Night']
@Paulooh007
Paulooh007 / feat_eng2.py
Created March 7, 2020 20:24
feat eng 2
#Getting the recent amount withdrawn or deposited
train['prev_tran'] = train['current bank amount'] - train['last bank amount']
#credit if the value is positive or debit if otherwise
train['credit_or_debit'] = ['credit' if x > 0 else 'debit' for x in train['prev_tran'] ]
@Paulooh007
Paulooh007 / feat,py
Last active March 8, 2020 06:41
feat
features = train[['current bank amount', 'last bank amount',
'time taken (seconds)', 'most recent bank amount', 'account type',
'age', 'occupation', 'credit card type',
'account source verification', 'transaction source method',
'account destination verification', 'dayofweek_name', 'is_weekend',
'hour','prev_tran','credit_or_debit','period_of_day']]
X = pd.get_dummies(columns=['account type',
'occupation', 'credit card type', 'credit_or_debit',
'account source verification', 'transaction source method',
'account destination verification', 'dayofweek_name','period_of_day'], data = features)
@Paulooh007
Paulooh007 / algo.py
Created March 8, 2020 05:15
import algo
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import auc, roc_auc_score
@Paulooh007
Paulooh007 / algo_loop.py
Last active March 8, 2020 05:25
algo loop
models = []
models.append(('Lr', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('Knn', KNeighborsClassifier()))
models.append(('Decision_Tree', DecisionTreeClassifier()))
# evaluate each model in turn
results = []
names = []
for name, model in models:
model.fit(X_train, y_train)
score = roc_auc_score(y_test, model.predict_proba(X_test)[:,1])
results.append(score)
names.append(name)
msg = "%s: %f " % (name, score)
print(msg)
@Paulooh007
Paulooh007 / test.py
Created March 8, 2020 05:57
transform test
test['transaction time']= pd.to_datetime(test['transaction time'])
test['hour'] = test['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
test['dayofweek_name'] = test['transaction time'].dt.weekday_name #day of week of transaction
test['is_weekend'] = np.where(test['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not
test['period_of_day'] = pd.cut(test['hour'], bins=bins, labels=period)
#Getting the recent amount withdrawn or deposited
test['prev_tran'] = test['current bank amount'] - test['last bank amount']
#credit if the value is positive or debit if otherwise