dhanush/predict.py

## predict.py
import pandas
import math
from numpy.random import permutation
from sklearn import svm

x_cols = [] #array of all the features (column names in the file) we use to predict
y_cols = ['role'] #to be predicted item

#Create Pandas dataframe
training_data_frame = pandas.read_csv('train.csv')

#split the existing trained data to 3/4th train data and 1/4th test data
random_indices = permutation(training_data_frame.index)
test_cutoff = math.floor(len(training_data_frame)/4)
test_data = techDf.loc[random_indices[1:test_cutoff]]
train_data = techDf.loc[random_indices[test_cutoff:]]

## Method to predict using SVM
def predict_tech(predict_data):
    clf = svm.SVC(probability=True,kernel='linear')
    clf.fit(train_data[x_cols], train_data[y_cols].values.ravel())
    predictions = clf.predict(predict_data)
    predict_probab = clf.predict_proba(predict_data)
    return predictions,predict_probab


#Select one single entry to test
test = test_data[x_cols][0:1]

predictions, predict_probab = predict_tech(test)

#print the variables predictions and predict_probab to see the predictions & its probability
	import pandas
	import math
	from numpy.random import permutation
	from sklearn import svm

	x_cols = [] #array of all the features (column names in the file) we use to predict
	y_cols = ['role'] #to be predicted item

	#Create Pandas dataframe
	training_data_frame = pandas.read_csv('train.csv')

	#split the existing trained data to 3/4th train data and 1/4th test data
	random_indices = permutation(training_data_frame.index)
	test_cutoff = math.floor(len(training_data_frame)/4)
	test_data = techDf.loc[random_indices[1:test_cutoff]]
	train_data = techDf.loc[random_indices[test_cutoff:]]

	## Method to predict using SVM
	def predict_tech(predict_data):
	clf = svm.SVC(probability=True,kernel='linear')
	clf.fit(train_data[x_cols], train_data[y_cols].values.ravel())
	predictions = clf.predict(predict_data)
	predict_probab = clf.predict_proba(predict_data)
	return predictions,predict_probab


	#Select one single entry to test
	test = test_data[x_cols][0:1]

	predictions, predict_probab = predict_tech(test)

	#print the variables predictions and predict_probab to see the predictions & its probability