Skip to content

Instantly share code, notes, and snippets.

#Check data for skewness
fig, axs = plt.subplots(ncols=7, nrows=2, figsize=(20, 10))
index = 0
axs = axs.flatten()
for k,v in data.items():
sns.distplot(v, ax=axs[index], color="green")
index += 1
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=5.0)
#Analyse the data
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file
from pandas import read_csv
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
header = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
df = read_csv('housing.csv', header=None, delimiter=r"\s+", names=header)
#Load & Format CSV, Set Tain & Test Split
import pandas as pd
import numpy as np
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
file="housing.csv" #load CSV
header = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] #name columns
df=pd.read_csv(file, header=None, delimiter=r"\s+", names=header) #format downloaded CSV in dataframe
print(df.head())
# 10.c Evalute Model Performance after Boxcox Transformation
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from scipy import stats
import seaborn as sns
from scipy.special import boxcox, inv_boxcox
# 7.c. Identify Skewed Data
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
class predit:
def bestFitLine(self):
size=np.array([1491,1526,1533,1680,1680,1869,1890,1920,1936,1950,1953,2016,2117,3072,3182,3196]).reshape(-1,1)
#7.b. Identify Outliers in a dataset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
class predit:
def bestFitLine(self):
size=np.array([1300,1491,1526,1533,1680,1680,1869,1890,1920,1936,1950,1953,2016,2117,3072,3182,3196,3842,5925,7879,9000])
#2. Code a machine Learning program from scratch - House Price Prediction
#2.a Predict prices for a list of houses
newHouseSize= np.array([2268,2280,2628,2645,3000]) #Update a single size with list of sizes
for size, cost in zip(newHouseSize, price):
print ("Price of {} sq feet house is: {}".format(size, cost)) #Format the ouput
@ShrashtiSinghal
ShrashtiSinghal / 5.py
Last active August 8, 2020 11:45
Medium article 1
import numpy as np
from sklearn.linear_model import LinearRegression
class predit:
def bestFitLine(self,data):
size=np.array([500,650,700,780,900,1100,1150,2000,2200,2500]).reshape(-1,1)
price=np.array([1000,1500,1600,1770,2200,3000,3500,4400,4600,6000]).reshape(-1,1)
regressionLine=LinearRegression().fit(size,price)
pred=regressionLine.predict(size)
@ShrashtiSinghal
ShrashtiSinghal / 4.py
Created July 19, 2020 17:10
Medium article 1
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
class predit:
def bestFitLine(self,data):
size=np.array([500,650,700,780,900,1100,1150,2000,2200,2500]).reshape(-1,1)
price=np.array([1000,1500,1600,1770,2200,3000,3500,4400,4600,6000]).reshape(-1,1)
regressionLine=LinearRegression().fit(size,price)
@ShrashtiSinghal
ShrashtiSinghal / 3.py
Created July 19, 2020 17:08
Medium article 1
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
class predit:
def bestFitLine(self):
size=np.array([500,650,700,780,900,1100,1150,2000,2200,2500]).reshape(-1,1)
price=np.array([1000,1500,1600,1770,2200,3000,3500,4400,4600,6000]).reshape(-1,1)
regressionLine=LinearRegression().fit(size,price)