Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
print('Skewness = ',train['SalePrice'].skew())
target = np.log(train['SalePrice'])
print('Skewness = ',target.skew())
sns.distplot(target);
corr = train.corr()
corr['SalePrice'].sort_values(ascending=False).head(10)
table = pd.pivot_table(train,index='OverallQual',values='SalePrice',aggfunc=np.mean)
table
plt.scatter(x=train['GrLivArea'], y=train['SalePrice'])
plt.ylabel('Sale Price')
plt.xlabel('GrLivArea')
plt.show();
# dropping outlier values from the dataset
train = train[train['GrLivArea']<4500]
#train rows
ntrain = train.shape[0]
#save log transform of target feature
target = np.log(train['SalePrice'])
#drop Id and SalePrice from train dataframe
train.drop(['Id','SalePrice'],inplace=True,axis=1)
#store test Id
#Null values
train.isna().sum().sort_values(ascending=False).head(20)
# Ordinal features
#NA means no Pool
train['PoolQC'].replace(['Ex','Gd','TA','Fa',np.nan],[4,3,2,1,0],inplace=True)
# NA means no fence
train['Fence'].replace(['GdPrv','MnPrv','GdWo','MnWw',np.nan],[4,3,2,1,0],inplace=True)
# NA means no fireplace
train['FireplaceQu'].replace(['Ex','Gd','TA','Fa','Po',np.nan],[5,4,3,2,1,0],inplace=True)
# Ordinal features
for i in ['GarageCond','GarageQual']:
train[i].replace(['Ex','Gd','TA','Fa','Po',np.nan],[5,4,3,2,1,0],inplace=True)
# Nominal features
for i in ['GarageFinish','GarageType']:
train[i].fillna('None',inplace=True)
# Numerical features
for i in ['GarageYrBlt','GarageCars','GarageArea']: