Skip to content

Instantly share code, notes, and snippets.

@toxdes
Last active March 16, 2019 20:47
Show Gist options
  • Save toxdes/64c7f4009bd6ed5ab580783cbe71c507 to your computer and use it in GitHub Desktop.
Save toxdes/64c7f4009bd6ed5ab580783cbe71c507 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
import numpy as np
from math import *
# print("haha works")
#testing if the dataset file exists
file = open('data.data', 'r')
file.readline()
file.close()
#training the model
#load the data
data = np.genfromtxt('./data.data', delimiter=',', dtype=float)
X = data[:, range(0, 13)]
Y = data[:, 13]
#imputer was deprecated, so using the SimpleImputer as per the warning
imp = SimpleImputer(missing_values=np.nan, strategy='median')
#filter NaN values, because we don't need them?
X = imp.fit_transform(X)
#yay! splitting the training and testing the data, so we get to know what's up
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15, random_state=100)
# converting the y values(results column) to the one-dimentional array, because it IS a single column right?
# seems unnecessary idk
y_train = y_train.ravel()
y_test = y_test.ravel()
# get the classifier from sklearn
clf = GaussianNB()
# train our data
clf.fit(X_train, y_train)
# test our data
result2 = clf.predict(X_test)
# calculate the accuracy of our trained model
score = accuracy_score(y_test, result2) * 100
print('accuracy: {} %'.format(round(score, 3)))
# we're skipping the most difficult / important part because we don't know how to do it.
# i.e. improving the accuracy
# saving the model
file = open('trained_clf.pkl', 'wb')
# saves the clf object to the opened file
pickle.dump(clf, file)
file.close()
print('successfully trained the model lol')
from sklearn.impute import SimpleImputer
import numpy as np
from flask import Flask ,render_template,url_for,request
import pickle
# load the trained model
file = open('trained_clf.pkl', 'rb')
clf = pickle.load(file)
#initiate Imputer
imp = SimpleImputer(missing_values=np.nan, strategy='median')
# initiate server
app=Flask(__name__)
@app.route('/')
def home():
return render_template('home.html')
@app.route('/predict',methods=['POST'])
def predict():
if request.method=='POST' :
try:
comment1= int(request.form['comment1'])
comment2 = int(request.form['comment2'])
comment3 = int(request.form['comment3'])
comment4=int(request.form['comment4'])
comment5=int(request.form['comment5'])
comment6 = int(request.form['comment6'])
comment7 = int(request.form['comment7'])
comment8 = int(request.form['comment8'])
comment9 = int(request.form['comment9'])
comment10 = int(request.form['comment10'])
comment11 = int(request.form['comment11'])
comment12 = int(request.form['comment12'])
comment13=int(request.form['comment13'])
data=[comment1,comment2,comment3,comment4,comment5,comment6,comment7,comment8,comment9,comment10,comment11,comment12,comment13]
data=np.array(data).reshape(-1,1).T
vect=imp.transform(data)
my_prediction=clf.predict(vect)
return render_template('results.html',prediction=my_prediction)
except e:
print('Invalid values.')
print(e)
if __name__=='__main__' :
app.run(debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment