Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
def getdata(csvFile):
dat = pd.read_csv(csvFile)
return dat
import pandas as pd
def getdata(csvFile):
dat = pd.read_csv(csvFile)
return dat
def split_data_and_label(dat):
label = dat['Survived'].values
dat = dat.drop('Survived',axis=1)
return label,dat
def feature_extraction(featMat):
featMat = featMat._get_numeric_data()
featMat = featMat.fillna(0)
return featMat
from sklearn.ensemble import GradientBoostingClassifier
def trainclassifer(feat,label):
mod = GradientBoostingClassifier()
mod.fit(feat,label)
return mod
def evaluation_accuracy(prediction,actual):
acc = sum(prediction==actual)/len(prediction)
return acc
@winn
winn / pipeline.py
Last active October 12, 2019 09:41
def pipeline():
csvFile = 'train.csv'
tldat = getdata(csvFile)
trainLabel,trainDat = split_data_and_label(tldat)
trainFeat = feature_extraction(trainDat)
csvFile = 'test.csv'
tldat = getdata(csvFile)
testLabel,testDat = split_data_and_label(tldat)
testFeat = feature_extraction(testDat)
mod = trainclassifer(trainFeat,trainLabel)
def converttobinary(dat,col,attribute):
dat[attribute] = (dat[col] == attribute).astype('int')
return dat
def feature_extraction(featMat):
featMat = converttobinary(featMat,'Sex','male')
featMat = featMat._get_numeric_data()
featMat = featMat.fillna(0)
return featMat
def createdummie(dat,col):
dummiefeature = pd.get_dummies(dat[col])
dummiefeature = dummiefeature.iloc[:,0:-1]
dat = pd.concat([dat,dummiefeature],axis=1)
return dat