Last active
February 15, 2020 11:31
-
-
Save Ruqyai/c3fd668c907add66415694a55c56e38b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd # lib for read and deal with dataset | |
import matplotlib.pylab as plt # lib for visualization | |
import numpy as np # lib for matrixes and arrays | |
#URL of the dataset | |
filename = "https:// ... " | |
#Use the Pandas method read_csv() to load the data from the web address. | |
df = pd.read_csv(filename) | |
#Use the method head() to display the first five rows of the dataframe. | |
# To see what the data set looks like, we'll use the head() method. | |
df.head(10) | |
#Identify missing values | |
# replace "?" to NaN | |
df.replace("?", np.nan, inplace = True) | |
df.head(10) | |
df.isnull().sum() | |
# then clean data and handle miss values | |
#after clean data we can save it in new csv file | |
df.to_csv('clean_df.csv') | |
#To know the strong and weak Linear Relationship | |
# "regplot" plots the scatterplot plus the fitted regression line for the data | |
plt.figure(figsize=(25,5)) | |
plt.subplot(131) | |
sb.regplot(x="column 1", y="Label", data=df) | |
plt.subplot(132) | |
sb.regplot(x="column 2", y="Label", data=df) | |
plt.subplot(133) | |
sb.regplot(x="column 3", y="Label", data=df) | |
#Correlation | |
df.corr() | |
#Show it as heatmap | |
plt.figure(figsize=(15,15)) | |
sb.heatmap(df.corr()) | |
label = df['Label'].values | |
features = # the values of columns | |
# Now spilt data | |
from sklearn.model_selection import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.30) | |
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) | |
# Build The structure of Model | |
from sklearn import linear_model | |
model=linear_model.LinearRegression() | |
# Train the Model | |
model.fit(X_train, y_train) | |
# Evaluate | |
Accuracy = model.score(X_train, y_train) | |
print ("Accuracy of the training : ", Accuracy*100) | |
accuracy = model.score(X_test, y_test) | |
print ("Accuracy of the testing : ", accuracy*100) | |
# Predict | |
predict = model.predict(X_test) | |
idx= np.random.choice(X_test.shape[0]) | |
print ( "Prediction:",predict[idx]) | |
print ("Reel", y_test[idx]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment