Okewunmi Paul Paulooh007

## import.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## import.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## read.py
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
print('train data shape: ',train.shape,'\ntest data shape: ',test.shape)

## feat_eng.py
# The first step is to convert the data type of the column (TRANSACTION TIME) to DateTime.
# That can be done using .to_datetime() method in Pandas.
train['transaction time']= pd.to_datetime(train['transaction time'])
train['hour'] = train['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
train['dayofweek_name'] = train['transaction time'].dt.weekday_name #day of week of transaction
train['is_weekend'] = np.where(train['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not

#Getting the period of the day from the hour feature
bins = [0,4,8,12,16,20,24]
period = ['Late Night', 'Early Morning','Morning','Noon','Eve','Night']

## feat_eng2.py
#Getting the recent amount withdrawn or deposited
train['prev_tran'] = train['current bank amount'] - train['last bank amount']
#credit if the value is positive or debit if otherwise
train['credit_or_debit'] = ['credit' if x > 0 else 'debit' for x in train['prev_tran'] ]

## feat,py
features = train[['current bank amount', 'last bank amount',
       'time taken (seconds)', 'most recent bank amount', 'account type',
       'age', 'occupation', 'credit card type',
       'account source verification', 'transaction source method',
       'account destination verification', 'dayofweek_name', 'is_weekend',
       'hour','prev_tran','credit_or_debit','period_of_day']]
X = pd.get_dummies(columns=['account type',
       'occupation', 'credit card type', 'credit_or_debit',
       'account source verification', 'transaction source method',
       'account destination verification', 'dayofweek_name','period_of_day'], data = features)

## algo.py
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import auc, roc_auc_score

## algo_loop.py
models = []
models.append(('Lr', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('Knn', KNeighborsClassifier()))
models.append(('Decision_Tree', DecisionTreeClassifier()))

## algoloop.py
# evaluate each model in turn
results = []
names = []
for name, model in models:
    model.fit(X_train, y_train)
    score = roc_auc_score(y_test, model.predict_proba(X_test)[:,1])
    results.append(score)
    names.append(name)
    msg = "%s: %f " % (name, score)
    print(msg)

## test.py
test['transaction time']= pd.to_datetime(test['transaction time'])
test['hour'] = test['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
test['dayofweek_name'] = test['transaction time'].dt.weekday_name #day of week of transaction
test['is_weekend'] = np.where(test['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not

test['period_of_day'] = pd.cut(test['hour'], bins=bins, labels=period)

#Getting the recent amount withdrawn or deposited
test['prev_tran'] = test['current bank amount'] - test['last bank amount']
#credit if the value is positive or debit if otherwise
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	train = pd.read_csv('train.csv')
	test = pd.read_csv('test.csv')
	print('train data shape: ',train.shape,'\ntest data shape: ',test.shape)
	# The first step is to convert the data type of the column (TRANSACTION TIME) to DateTime.
	# That can be done using .to_datetime() method in Pandas.
	train['transaction time']= pd.to_datetime(train['transaction time'])
	train['hour'] = train['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
	train['dayofweek_name'] = train['transaction time'].dt.weekday_name #day of week of transaction
	train['is_weekend'] = np.where(train['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not

	#Getting the period of the day from the hour feature
	bins = [0,4,8,12,16,20,24]
	period = ['Late Night', 'Early Morning','Morning','Noon','Eve','Night']
	#Getting the recent amount withdrawn or deposited
	train['prev_tran'] = train['current bank amount'] - train['last bank amount']
	#credit if the value is positive or debit if otherwise
	train['credit_or_debit'] = ['credit' if x > 0 else 'debit' for x in train['prev_tran'] ]
	features = train[['current bank amount', 'last bank amount',
	'time taken (seconds)', 'most recent bank amount', 'account type',
	'age', 'occupation', 'credit card type',
	'account source verification', 'transaction source method',
	'account destination verification', 'dayofweek_name', 'is_weekend',
	'hour','prev_tran','credit_or_debit','period_of_day']]
	X = pd.get_dummies(columns=['account type',
	'occupation', 'credit card type', 'credit_or_debit',
	'account source verification', 'transaction source method',
	'account destination verification', 'dayofweek_name','period_of_day'], data = features)
	from sklearn import model_selection
	from sklearn.linear_model import LogisticRegression
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
	from sklearn.metrics import auc, roc_auc_score
	models = []
	models.append(('Lr', LogisticRegression()))
	models.append(('LDA', LinearDiscriminantAnalysis()))
	models.append(('Knn', KNeighborsClassifier()))
	models.append(('Decision_Tree', DecisionTreeClassifier()))
	# evaluate each model in turn
	results = []
	names = []
	for name, model in models:
	model.fit(X_train, y_train)
	score = roc_auc_score(y_test, model.predict_proba(X_test)[:,1])
	results.append(score)
	names.append(name)
	msg = "%s: %f " % (name, score)
	print(msg)
	test['transaction time']= pd.to_datetime(test['transaction time'])
	test['hour'] = test['transaction time'].dt.hour #returns the hour of the datetime(range 1 - 24)
	test['dayofweek_name'] = test['transaction time'].dt.weekday_name #day of week of transaction
	test['is_weekend'] = np.where(test['dayofweek_name'].isin(['Sunday','Saturday']),1,0) #weekend or not

	test['period_of_day'] = pd.cut(test['hour'], bins=bins, labels=period)

	#Getting the recent amount withdrawn or deposited
	test['prev_tran'] = test['current bank amount'] - test['last bank amount']
	#credit if the value is positive or debit if otherwise