Skip to content

Instantly share code, notes, and snippets.

@martingaido
Created August 25, 2020 12:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martingaido/fe74fd2f0951733d132a852f2293f3f3 to your computer and use it in GitHub Desktop.
Save martingaido/fe74fd2f0951733d132a852f2293f3f3 to your computer and use it in GitHub Desktop.
Predict House prices using linear regression
## Predict House prices using linear regression
## Dataset: https://www.kaggle.com/rubenssjr/brasilian-houses-to-rent
import pandas as pd
from sklearn import preprocessing, linear_model
import numpy as np
import sklearn
### Loading Data ###
print('-' * 30); print(' Importing Data ...'); print('-' * 30)
data = pd.read_csv('houses_to_rent.csv', sep=',')
data = data[['city', 'rooms', 'bathroom', 'parking spaces', 'fire insurance', 'furniture', 'rent amount']]
print(data.head())
### Process Data ###
print('-' * 30); print(' Processing Data ...'); print('-' * 30)
data['rent amount'] = data['rent amount'].map(lambda i: int(i[2:].replace(',', ''))) # take out R$ and commas
data['fire insurance'] = data['fire insurance'].map(lambda i: int(i[2:].replace(',', ''))) # take out R$ and commas
le = preprocessing.LabelEncoder()
data['furniture'] = le.fit_transform((data['furniture'])) # change to 1 or 0
print(data.head())
print('-' * 30); print(' Checking Null Data...'); print('-' * 30)
print(data.isnull().sum()) # show null data
data = data.dropna() # replace null data
print(data.isnull().sum()) # show null data
print('-' * 30); print(' Header Information '); print('-' * 30)
print(data.head())
### Split Data ###
print('-' * 30); print(' Split Data '); print('-' * 30)
x = np.array(data.drop(['rent amount'], 1))
y = np.array(data['rent amount'])
print('X', x.shape) # instances, features
print('Y', y.shape)
xTrain, xTest, yTrain, yTest = sklearn.model_selection.train_test_split(x, y, test_size=0.2, random_state=100) # 20% testing, 80% training
print('XTrain ', xTrain.shape)
print('XTest ', xTest.shape)
### Training ###
print('-' * 30); print(' Training Model ... '); print('-' * 30)
model = linear_model.LinearRegression()
model.fit(xTrain, yTrain)
accuracy = model.score(xTest, yTest)
print('Coefficient: ', model.coef_)
print('Intercept: ', model.intercept_)
print('Accuracy: ', round(accuracy * 100, 3), '%')
### Evaluation ###
print('-' * 30); print(' Manual Testing ... '); print('-' * 30)
testVals = model.predict(xTest)
print(f'Values: {testVals.shape}')
error = []
for i, testVal in enumerate(testVals):
error.append(yTest[i] - testVal)
print(f'Original Value: {yTest[i]} - Prediction Value: {int(testVal)} - Error: {int(error[i])}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment