Let The Data Confess letthedataconfess

## applying SVC model
from sklearn.svm import SVC
clf=SVC()
clf.fit(x_train_tfidf,y_train)

y_pred=clf.predict(x_test_tfidf)
print(y_pred, y_test)

## model evaluation
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix
print("Accuracy score:",accuracy_score(y_pred,y_test))
print("Confusion matrix:",confusion_matrix(y_pred,y_test))
print("Classification report:",classification_report(y_pred,y_test))

## prediction
message=["Congratulations on building your first Sentiment Analysis model! You're going great!"]
message=tfidf.transform(message).toarray()
clf.predict(message)[0]

## stop words removal
nltk.download('stopwords')
stopword_list=stopwords.words('english')
stopword_list.remove('no')
stopword_list.remove('not')

df.review=df.review.apply(lambda x : " ".join(x for x in x.split() if x not in stopword_list))
df['review'][5]

## scatter plot
import matplotlib.pyplot as plt
x = [5,7,8,10,3,17,4,9,7,9,8,9,6]
y = [40,36,47,48,120,46,67,48,31,134,50,35,56]
plt.scatter(x, y)
plt.show()

## boxplot
import matplotlib.pyplot as plt
data =[20,25,27,75,40,67,62,75,78,71,32,82,127,140,78,67,132,82,87,66,56,52]
plt.boxplot(data,vert=False)
plt.show()


## Z score
import numpy as np
outliers=[]
dataset=[11,10,12,14,12,15,14,13,15,102,12,14,17,19,107,10,13,12,14,12,108,12,11,14,13,15,10,15,12,10,14,13,15,10]

def detect_outliers(data):
  threshold=3
	mean=np.mean(data)
	std=np.std(data)

	for i in dataset:

## interquartlie range
dataset=[11,10,12,14,12,15,14,13,15,102,12,14,17,19,107,10,13,12,14,12,108,12,11,14,13,15,10,15,12,10,14,13,15,10]

dataset=sorted(dataset)
q1, q3= np.percentile(dataset,[25,75])
iqr = q3 - q1
lower_bound = q1 -(1.5 * iqr)
upper_bound = q3 +(1.5 * iqr)
print('lower_bound={},upper_bound={}'.format(lower_bound,upper_bound))

outliers_pt=[]

## installation of openCV
!pip install opencv-python

## Image reading.py
img = cv2.imread('Zebra.jpg')
plt.figure(figsize = (10,6))
plt.imshow(img)
	from sklearn.svm import SVC
	clf=SVC()
	clf.fit(x_train_tfidf,y_train)

	y_pred=clf.predict(x_test_tfidf)
	print(y_pred, y_test)
	from sklearn.metrics import accuracy_score, classification_report,confusion_matrix
	print("Accuracy score:",accuracy_score(y_pred,y_test))
	print("Confusion matrix:",confusion_matrix(y_pred,y_test))
	print("Classification report:",classification_report(y_pred,y_test))
	message=["Congratulations on building your first Sentiment Analysis model! You're going great!"]
	message=tfidf.transform(message).toarray()
	clf.predict(message)[0]
	nltk.download('stopwords')
	stopword_list=stopwords.words('english')
	stopword_list.remove('no')
	stopword_list.remove('not')

	df.review=df.review.apply(lambda x : " ".join(x for x in x.split() if x not in stopword_list))
	df['review'][5]
	import matplotlib.pyplot as plt
	x = [5,7,8,10,3,17,4,9,7,9,8,9,6]
	y = [40,36,47,48,120,46,67,48,31,134,50,35,56]
	plt.scatter(x, y)
	plt.show()
	import matplotlib.pyplot as plt
	data =[20,25,27,75,40,67,62,75,78,71,32,82,127,140,78,67,132,82,87,66,56,52]
	plt.boxplot(data,vert=False)
	plt.show()
	import numpy as np
	outliers=[]
	dataset=[11,10,12,14,12,15,14,13,15,102,12,14,17,19,107,10,13,12,14,12,108,12,11,14,13,15,10,15,12,10,14,13,15,10]

	def detect_outliers(data):
	threshold=3
	mean=np.mean(data)
	std=np.std(data)

	for i in dataset:
	dataset=[11,10,12,14,12,15,14,13,15,102,12,14,17,19,107,10,13,12,14,12,108,12,11,14,13,15,10,15,12,10,14,13,15,10]

	dataset=sorted(dataset)
	q1, q3= np.percentile(dataset,[25,75])
	iqr = q3 - q1
	lower_bound = q1 -(1.5 * iqr)
	upper_bound = q3 +(1.5 * iqr)
	print('lower_bound={},upper_bound={}'.format(lower_bound,upper_bound))

	outliers_pt=[]
	img = cv2.imread('Zebra.jpg')
	plt.figure(figsize = (10,6))
	plt.imshow(img)