Skip to content

Instantly share code, notes, and snippets.

View grohith327's full-sized avatar

Rohith Gandhi G grohith327

View GitHub Profile
import numpy as np
from sklearn.model_selection import train_test_split
Y = []
for val in y:
if(val == 0):
Y.append([1,0])
else:
Y.append([0,1])
Y = np.array(Y)
import pandas as pd
import numpy as np
file_path = '/Users/rohith/Documents/Datasets/SMS_Spam/spam.csv'
df = pd.read_csv(file_path)
out = df['v1']
text = df['v2']
label = []
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
x_train = []
x_test = []
y_train = []
y_test = []
text, label = shuffle(text,label)
x_train, x_test, y_train, y_test = train_test_split(text,label,train_size=0.9)
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
count_vect = CountVectorizer(decode_error='ignore')
x_train_count = count_vect.fit_transform(x_train)
tfidf_trans = TfidfTransformer()
x_train_tfidf = tfidf_trans.fit_transform(x_train_count)
x_test_count = count_vect.transform(x_test)
x_test_tfidf = tfidf_trans.transform(x_test_count)
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
clf = XGBClassifier(n_estimators=200)
clf.fit(x_train_tfidf,y_train)
y_pred = clf.predict(x_test_tfidf)
print(accuracy_score(y_test,y_pred))
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
import numpy as np
X, Y = shuffle(X,Y)
x_train = []
y_train = []
x_test = []
y_test = []
## Logistic Regression
import numpy as np
def sigmoid(x):
return (1 / (1 + np.exp(-x)))
m = 90
alpha = 0.0001
theta_0 = np.zeros((m,1))
from sklearn.metrics import accuracy_score
test_x_1 = x_test[:,0]
test_x_2 = x_test[:,1]
test_x_3 = x_test[:,2]
test_x_4 = x_test[:,3]
test_x_1 = np.array(test_x_1)
test_x_2 = np.array(test_x_2)
test_x_3 = np.array(test_x_3)
import matplotlib.pyplot as plt
cost_func = np.array(cost_func)
cost_func = cost_func.reshape(10000,1)
plt.plot(range(len(cost_func)),cost_func)
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)
print(accuracy_score(y_test,y_pred))