Ruqyai/ML.py

## ML.py


import pandas as pd                    # lib for read and deal with dataset
import matplotlib.pylab as plt         #  lib for visualization
import numpy as np                     #  lib for matrixes and arrays


#URL of the dataset
filename = "https:// ... "
#Use the Pandas method read_csv() to load the data from the web address.
df = pd.read_csv(filename)

#Use the method head() to display the first five rows of the dataframe.
# To see what the data set looks like, we'll use the head() method.
df.head(10)

#Identify missing values
# replace "?" to NaN
df.replace("?", np.nan, inplace = True)
df.head(10)
df.isnull().sum()
# then clean data and handle miss values

#after clean data we can save it in new csv file
df.to_csv('clean_df.csv')


#To know the strong and weak Linear Relationship
# "regplot" plots the scatterplot plus the fitted regression line for the data

plt.figure(figsize=(25,5))
plt.subplot(131)
sb.regplot(x="column 1", y="Label", data=df)
plt.subplot(132)
sb.regplot(x="column 2", y="Label", data=df)
plt.subplot(133)
sb.regplot(x="column 3", y="Label", data=df)


#Correlation
df.corr()

#Show it as heatmap
plt.figure(figsize=(15,15))
sb.heatmap(df.corr())

label = df['Label'].values
features = # the values of columns

# Now spilt data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.30)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# Build The structure of Model
from sklearn import linear_model

model=linear_model.LinearRegression()

# Train the Model
model.fit(X_train, y_train)

# Evaluate
Accuracy = model.score(X_train, y_train)
print ("Accuracy of the training : ", Accuracy*100)

accuracy = model.score(X_test, y_test)
print ("Accuracy of the testing : ", accuracy*100)


# Predict
predict = model.predict(X_test)
idx= np.random.choice(X_test.shape[0])
print ( "Prediction:",predict[idx])
print ("Reel", y_test[idx])


	import pandas as pd # lib for read and deal with dataset
	import matplotlib.pylab as plt # lib for visualization
	import numpy as np # lib for matrixes and arrays


	#URL of the dataset
	filename = "https:// ... "
	#Use the Pandas method read_csv() to load the data from the web address.
	df = pd.read_csv(filename)

	#Use the method head() to display the first five rows of the dataframe.
	# To see what the data set looks like, we'll use the head() method.
	df.head(10)

	#Identify missing values
	# replace "?" to NaN
	df.replace("?", np.nan, inplace = True)
	df.head(10)
	df.isnull().sum()
	# then clean data and handle miss values

	#after clean data we can save it in new csv file
	df.to_csv('clean_df.csv')


	#To know the strong and weak Linear Relationship
	# "regplot" plots the scatterplot plus the fitted regression line for the data

	plt.figure(figsize=(25,5))
	plt.subplot(131)
	sb.regplot(x="column 1", y="Label", data=df)
	plt.subplot(132)
	sb.regplot(x="column 2", y="Label", data=df)
	plt.subplot(133)
	sb.regplot(x="column 3", y="Label", data=df)


	#Correlation
	df.corr()

	#Show it as heatmap
	plt.figure(figsize=(15,15))
	sb.heatmap(df.corr())

	label = df['Label'].values
	features = # the values of columns

	# Now spilt data
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.30)
	print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

	# Build The structure of Model
	from sklearn import linear_model

	model=linear_model.LinearRegression()

	# Train the Model
	model.fit(X_train, y_train)

	# Evaluate
	Accuracy = model.score(X_train, y_train)
	print ("Accuracy of the training : ", Accuracy*100)

	accuracy = model.score(X_test, y_test)
	print ("Accuracy of the testing : ", accuracy*100)


	# Predict
	predict = model.predict(X_test)
	idx= np.random.choice(X_test.shape[0])
	print ( "Prediction:",predict[idx])
	print ("Reel", y_test[idx])