Rohith Gandhi G grohith327

## sentiment_5.py
import numpy as np
from sklearn.model_selection import train_test_split

Y = []
for val in y:
    if(val == 0):
        Y.append([1,0])
    else:
        Y.append([0,1])
Y = np.array(Y)

## Spam_Class_1.py
import pandas as pd
import numpy as np

file_path = '/Users/rohith/Documents/Datasets/SMS_Spam/spam.csv'
df = pd.read_csv(file_path)

out = df['v1']
text = df['v2']

label = []

## Spam_Class_2.py
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split

x_train = []
x_test = []
y_train = []
y_test = []

text, label = shuffle(text,label)
x_train, x_test, y_train, y_test = train_test_split(text,label,train_size=0.9)

## Spam_Class_3.py
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

count_vect = CountVectorizer(decode_error='ignore')
x_train_count = count_vect.fit_transform(x_train)
tfidf_trans = TfidfTransformer()
x_train_tfidf = tfidf_trans.fit_transform(x_train_count)

x_test_count = count_vect.transform(x_test)
x_test_tfidf = tfidf_trans.transform(x_test_count)

## Spam_Class_4.py
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

clf = XGBClassifier(n_estimators=200)
clf.fit(x_train_tfidf,y_train)
y_pred = clf.predict(x_test_tfidf)
print(accuracy_score(y_test,y_pred))

## Logistic_2.py
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
import numpy as np

X, Y = shuffle(X,Y)

x_train = []
y_train = []
x_test = []
y_test = []

## Logistic_3.py
## Logistic Regression
import numpy as np

def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

m = 90
alpha = 0.0001

theta_0 = np.zeros((m,1))

## Logistic_4.py
from sklearn.metrics import accuracy_score

test_x_1 = x_test[:,0]
test_x_2 = x_test[:,1]
test_x_3 = x_test[:,2]
test_x_4 = x_test[:,3]

test_x_1 = np.array(test_x_1)
test_x_2 = np.array(test_x_2)
test_x_3 = np.array(test_x_3)

## Logistic_5.py
import matplotlib.pyplot as plt

cost_func = np.array(cost_func)
cost_func = cost_func.reshape(10000,1)
plt.plot(range(len(cost_func)),cost_func)

## Logistic_6.py
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)
print(accuracy_score(y_test,y_pred))
	import numpy as np
	from sklearn.model_selection import train_test_split

	Y = []
	for val in y:
	if(val == 0):
	Y.append([1,0])
	else:
	Y.append([0,1])
	Y = np.array(Y)
	import pandas as pd
	import numpy as np

	file_path = '/Users/rohith/Documents/Datasets/SMS_Spam/spam.csv'
	df = pd.read_csv(file_path)

	out = df['v1']
	text = df['v2']

	label = []
	from sklearn.utils import shuffle
	from sklearn.cross_validation import train_test_split

	x_train = []
	x_test = []
	y_train = []
	y_test = []

	text, label = shuffle(text,label)
	x_train, x_test, y_train, y_test = train_test_split(text,label,train_size=0.9)
	from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

	count_vect = CountVectorizer(decode_error='ignore')
	x_train_count = count_vect.fit_transform(x_train)
	tfidf_trans = TfidfTransformer()
	x_train_tfidf = tfidf_trans.fit_transform(x_train_count)

	x_test_count = count_vect.transform(x_test)
	x_test_tfidf = tfidf_trans.transform(x_test_count)
	from xgboost import XGBClassifier
	from sklearn.metrics import accuracy_score

	clf = XGBClassifier(n_estimators=200)
	clf.fit(x_train_tfidf,y_train)
	y_pred = clf.predict(x_test_tfidf)
	print(accuracy_score(y_test,y_pred))
	## Logistic Regression
	import numpy as np

	def sigmoid(x):
	return (1 / (1 + np.exp(-x)))

	m = 90
	alpha = 0.0001

	theta_0 = np.zeros((m,1))
	from sklearn.metrics import accuracy_score

	test_x_1 = x_test[:,0]
	test_x_2 = x_test[:,1]
	test_x_3 = x_test[:,2]
	test_x_4 = x_test[:,3]

	test_x_1 = np.array(test_x_1)
	test_x_2 = np.array(test_x_2)
	test_x_3 = np.array(test_x_3)
	import matplotlib.pyplot as plt

	cost_func = np.array(cost_func)
	cost_func = cost_func.reshape(10000,1)
	plt.plot(range(len(cost_func)),cost_func)
	from sklearn.metrics import accuracy_score
	from sklearn.linear_model import LogisticRegression

	clf = LogisticRegression()
	clf.fit(x_train,y_train)
	y_pred = clf.predict(x_test)
	print(accuracy_score(y_test,y_pred))