Created
May 14, 2020 18:16
-
-
Save jkapila/fe28656ed4c0fb9643375ba60af660d1 to your computer and use it in GitHub Desktop.
Testing DAR on Bostone Housing Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Testing Distribution Assertive Regression with Boston Housing Data | |
library(dplyr) | |
library(mlbench) | |
data("BostonHousing") | |
df <- BostonHousing[order(BostonHousing[,'medv']),] | |
# Using DAR with lm Model | |
mod_dfar <- dafr(medv ~. , data = df,dec.front = 3,dec.back = 3) | |
summary(mod_dfar) | |
mod_dfar$call | |
mod_dfar$models | |
mod_dfar$results | |
mod_dfar$mapes | |
mod_dfar$split.freq | |
# Using DAR with lm Model | |
mod_dfar <- dafr(medv ~. , data = df, dec.front = 3, dec.back = 3,model = glm) | |
# Plotting which creates plots in the blog post | |
library(ggplot2) | |
library(ggthemes) | |
library(scales) | |
# The Bath Tub Curve | |
mod_dfar$mapes %>% | |
ggplot(aes(x=splits,y=mape)) + | |
geom_point(size=3) + | |
geom_line(size=1,linetype='dashed') + | |
scale_x_continuous(breaks = 1:10) + | |
theme_light() + | |
labs(title = 'Plot of Unsplitted Absolute Percentage Error', | |
x="Split Index", | |
y="% MAPE Value", | |
caption = "Data Boston Housing (mlbench)\n Model: Linear Model with Default Parameters") | |
# The shaded region with unsplitted regression | |
mean_mape <- round(mean(mod_dfar$mapes$mape)) | |
mod_dfar$mapes %>% | |
ggplot(aes(x=splits,y=mape)) + | |
geom_point() + | |
geom_line() + | |
geom_hline(yintercept = mean_mape, linetype="twodash",size=1.1) + | |
geom_rect(aes(xmin = 1, xmax = 3.5, ymin = -Inf, ymax = Inf), | |
fill = "red", alpha = 0.03)+ | |
geom_rect(aes(xmin = 7.5, xmax = 10, ymin = -Inf, ymax = Inf), | |
fill = "red", alpha = 0.03) + | |
annotate(geom = "label", x = 5.5, y = 20, | |
label = "Avg MAPE : 18%", | |
color = "black",size = 4)+ | |
scale_x_continuous(breaks = 1:10) + | |
theme_bw() + | |
labs(title = 'Plot of Unsplitted Absolute Percentage Error', | |
subtitle = "Shaded regions indicate MAPE with High Values", | |
x="Split Index", | |
y="% MAPE Value", | |
caption = "Data Boston Housing (mlbench)\n Model: Linear Model with Default Parameters") | |
# The shaded region with splitted regression | |
mean_mape_dafr <- round(mean(mod_dfar$mapes$mape_dec)) | |
mod_dfar$mapes %>% | |
ggplot(aes(x=splits,y=mape_dec)) + | |
geom_point() + | |
geom_line() + | |
geom_hline(yintercept = mean_mape_dafr,linetype="twodash",size=1.05) + | |
geom_rect(aes(xmin = 1, xmax = 3.5, ymin = -Inf, ymax = Inf), | |
fill = "purple", alpha = 0.03)+ | |
geom_rect(aes(xmin = 7.5, xmax = 10, ymin = -Inf, ymax = Inf), | |
fill = "purple", alpha = 0.03) + | |
annotate(geom = "label", x = 5.5, y = 12.5, | |
label = "Avg MAPE : 11%", | |
color = "black",size = 4)+ | |
scale_x_continuous(breaks = 1:10) + | |
theme_bw() + | |
labs(title = 'Plot of Splitted Absolute Percentage Error with Distributed Assertive Regression', | |
subtitle = "Shaded regions indicate MAPE for Front Decile: 3 & Back Decile: 3", | |
x="Split Index", | |
y="% MAPE Value", | |
caption = "Data Boston Housing (mlbench) | |
Model: Linear Model with Default Parameters") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment