Last active
June 25, 2016 15:44
-
-
Save nqbao/f3f531751bd7f01d991401b58a7f7a3f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pricing prediction with linear regression | |
# we use linear regressions to predict the price of house | |
# then use RMSE to evaluate the model | |
library(arimo) | |
housing_ddf = arimo.getDDF('housing') | |
housing = head(ddf, nrow(ddf)) | |
splitdf <- function(dataframe, ratio=0.8, seed=NULL) { | |
if (!is.null(seed)) set.seed(seed) | |
index <- 1:nrow(dataframe) | |
trainindex = sample(1:nrow(dataframe), size=ratio*nrow(dataframe)) | |
trainset <- dataframe[trainindex, ] | |
testset <- dataframe[-trainindex, ] | |
list(train=trainset,test=testset) | |
} | |
rmse = function(y0, y1) { | |
sqrt(mean((y0 - y1) ^ 2)) | |
} | |
split = splitdf(housing) | |
fit = lm(price ~ lotsize + bedrooms + bathrms + stories, data=split$train) | |
summary(fit) | |
fitted(fit) # predicted values | |
residuals(fit) # residuals (actual - predicted) | |
predictedPrice = predict(fit, split$test) | |
rmse(split$test$price, predictedPrice) | |
# r-squared | |
rq = function(y0, y1) { | |
y_mean = mean(y0) | |
ss_tot = sum((y0 - y_mean) ^ 2) | |
ss_res = sum((y0 - y1) ^ 2) | |
1 - (ss_res / ss_tot) | |
} | |
rq(split$test$price, predictedPrice) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment