import pandas as pd
from sklearn import tree
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
import sys
from mlxtend.classifier import EnsembleVoteClassifier
from sklearn import svm
from imblearn.over_sampling import SMOTE

verbose = False
ratio = 'auto'

print (sys.version)
input_file = "TrainDataBinaryClassification.xls"
df = pd.read_csv(input_file,header=0,sep=",")
print(df.head())

print(df.head(5))

#Remove insignificant id column
df.drop(['Id'],1,inplace=True)
#List all column headers
print(list(df))

#Fill missing values
df = df.fillna(-999)

features = list(df.columns[:-1])
print(features);

y1 = df['class']
x1 = df[features]

#Option 1
#SENN = SMOTEENN(ratio=ratio)
#x, y = SENN.fit_sample(x1, y1)

#Option #2
sm = SMOTE(kind='svm')
x, y = sm.fit_sample(x1, y1)

pred_train, pred_test, tar_train, tar_test = train_test_split(x,y,test_size=0.3)
print('Shape of test data')

rf = RandomForestClassifier(n_estimators=350) # initialize
classifier2 = rf.fit(x, y) # fit the data to the algorithm

pred_train, pred_test, tar_train, tar_test = train_test_split(x,y,test_size=0.3)
print('Shape of test data')

classifier = tree.DecisionTreeClassifier(criterion="entropy")
classifier = classifier.fit(x,y)

classifier3 = RandomForestClassifier(n_jobs=250)
classifier3 = classifier3.fit(x,y)

classifier2 = svm.SVC()
classifier2 = classifier2.fit(x,y)

clfs = [classifier, classifier2, classifier3]
clf = EnsembleVoteClassifier(clfs, voting='hard', weights = (4,4,5))
clf.fit(x, y)

input_file = "TestDataTwoClass.xls"
df = pd.read_csv(input_file,header=0,sep=",")
df2 = pd.read_csv(input_file,header=0,sep=",")
df.drop(['Id'],1,inplace=True)
df = df.fillna(-999)
x = df[features]
predictions = clf.predict(x)
print('predictions')

i = 0
for i in range(0,len(predictions)):
    print(predictions[i])

df['class'] = predictions
df2['class'] = predictions

print('count',df['class'])

header = ["Id","class"]
df2.to_csv("Results_Binary_Class_Adulteration_Sep18_2.csv", sep=',', columns = header,index=False)