Skip to content

Instantly share code, notes, and snippets.

@Ruqyai
Last active February 15, 2020 11:31
Show Gist options
  • Save Ruqyai/c3fd668c907add66415694a55c56e38b to your computer and use it in GitHub Desktop.
Save Ruqyai/c3fd668c907add66415694a55c56e38b to your computer and use it in GitHub Desktop.
import pandas as pd # lib for read and deal with dataset
import matplotlib.pylab as plt # lib for visualization
import numpy as np # lib for matrixes and arrays
#URL of the dataset
filename = "https:// ... "
#Use the Pandas method read_csv() to load the data from the web address.
df = pd.read_csv(filename)
#Use the method head() to display the first five rows of the dataframe.
# To see what the data set looks like, we'll use the head() method.
df.head(10)
#Identify missing values
# replace "?" to NaN
df.replace("?", np.nan, inplace = True)
df.head(10)
df.isnull().sum()
# then clean data and handle miss values
#after clean data we can save it in new csv file
df.to_csv('clean_df.csv')
#To know the strong and weak Linear Relationship
# "regplot" plots the scatterplot plus the fitted regression line for the data
plt.figure(figsize=(25,5))
plt.subplot(131)
sb.regplot(x="column 1", y="Label", data=df)
plt.subplot(132)
sb.regplot(x="column 2", y="Label", data=df)
plt.subplot(133)
sb.regplot(x="column 3", y="Label", data=df)
#Correlation
df.corr()
#Show it as heatmap
plt.figure(figsize=(15,15))
sb.heatmap(df.corr())
label = df['Label'].values
features = # the values of columns
# Now spilt data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.30)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# Build The structure of Model
from sklearn import linear_model
model=linear_model.LinearRegression()
# Train the Model
model.fit(X_train, y_train)
# Evaluate
Accuracy = model.score(X_train, y_train)
print ("Accuracy of the training : ", Accuracy*100)
accuracy = model.score(X_test, y_test)
print ("Accuracy of the testing : ", accuracy*100)
# Predict
predict = model.predict(X_test)
idx= np.random.choice(X_test.shape[0])
print ( "Prediction:",predict[idx])
print ("Reel", y_test[idx])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment