Skip to content

Instantly share code, notes, and snippets.

@nqbao
Last active June 25, 2016 15:44
Show Gist options
  • Save nqbao/f3f531751bd7f01d991401b58a7f7a3f to your computer and use it in GitHub Desktop.
Save nqbao/f3f531751bd7f01d991401b58a7f7a3f to your computer and use it in GitHub Desktop.
# pricing prediction with linear regression
# we use linear regressions to predict the price of house
# then use RMSE to evaluate the model
library(arimo)
housing_ddf = arimo.getDDF('housing')
housing = head(ddf, nrow(ddf))
splitdf <- function(dataframe, ratio=0.8, seed=NULL) {
if (!is.null(seed)) set.seed(seed)
index <- 1:nrow(dataframe)
trainindex = sample(1:nrow(dataframe), size=ratio*nrow(dataframe))
trainset <- dataframe[trainindex, ]
testset <- dataframe[-trainindex, ]
list(train=trainset,test=testset)
}
rmse = function(y0, y1) {
sqrt(mean((y0 - y1) ^ 2))
}
split = splitdf(housing)
fit = lm(price ~ lotsize + bedrooms + bathrms + stories, data=split$train)
summary(fit)
fitted(fit) # predicted values
residuals(fit) # residuals (actual - predicted)
predictedPrice = predict(fit, split$test)
rmse(split$test$price, predictedPrice)
# r-squared
rq = function(y0, y1) {
y_mean = mean(y0)
ss_tot = sum((y0 - y_mean) ^ 2)
ss_res = sum((y0 - y1) ^ 2)
1 - (ss_res / ss_tot)
}
rq(split$test$price, predictedPrice)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment