Skip to content

Instantly share code, notes, and snippets.

@jkapila
Created May 14, 2020 18:16
Show Gist options
  • Save jkapila/fe28656ed4c0fb9643375ba60af660d1 to your computer and use it in GitHub Desktop.
Save jkapila/fe28656ed4c0fb9643375ba60af660d1 to your computer and use it in GitHub Desktop.
Testing DAR on Bostone Housing Data
## Testing Distribution Assertive Regression with Boston Housing Data
library(dplyr)
library(mlbench)
data("BostonHousing")
df <- BostonHousing[order(BostonHousing[,'medv']),]
# Using DAR with lm Model
mod_dfar <- dafr(medv ~. , data = df,dec.front = 3,dec.back = 3)
summary(mod_dfar)
mod_dfar$call
mod_dfar$models
mod_dfar$results
mod_dfar$mapes
mod_dfar$split.freq
# Using DAR with lm Model
mod_dfar <- dafr(medv ~. , data = df, dec.front = 3, dec.back = 3,model = glm)
# Plotting which creates plots in the blog post
library(ggplot2)
library(ggthemes)
library(scales)
# The Bath Tub Curve
mod_dfar$mapes %>%
ggplot(aes(x=splits,y=mape)) +
geom_point(size=3) +
geom_line(size=1,linetype='dashed') +
scale_x_continuous(breaks = 1:10) +
theme_light() +
labs(title = 'Plot of Unsplitted Absolute Percentage Error',
x="Split Index",
y="% MAPE Value",
caption = "Data Boston Housing (mlbench)\n Model: Linear Model with Default Parameters")
# The shaded region with unsplitted regression
mean_mape <- round(mean(mod_dfar$mapes$mape))
mod_dfar$mapes %>%
ggplot(aes(x=splits,y=mape)) +
geom_point() +
geom_line() +
geom_hline(yintercept = mean_mape, linetype="twodash",size=1.1) +
geom_rect(aes(xmin = 1, xmax = 3.5, ymin = -Inf, ymax = Inf),
fill = "red", alpha = 0.03)+
geom_rect(aes(xmin = 7.5, xmax = 10, ymin = -Inf, ymax = Inf),
fill = "red", alpha = 0.03) +
annotate(geom = "label", x = 5.5, y = 20,
label = "Avg MAPE : 18%",
color = "black",size = 4)+
scale_x_continuous(breaks = 1:10) +
theme_bw() +
labs(title = 'Plot of Unsplitted Absolute Percentage Error',
subtitle = "Shaded regions indicate MAPE with High Values",
x="Split Index",
y="% MAPE Value",
caption = "Data Boston Housing (mlbench)\n Model: Linear Model with Default Parameters")
# The shaded region with splitted regression
mean_mape_dafr <- round(mean(mod_dfar$mapes$mape_dec))
mod_dfar$mapes %>%
ggplot(aes(x=splits,y=mape_dec)) +
geom_point() +
geom_line() +
geom_hline(yintercept = mean_mape_dafr,linetype="twodash",size=1.05) +
geom_rect(aes(xmin = 1, xmax = 3.5, ymin = -Inf, ymax = Inf),
fill = "purple", alpha = 0.03)+
geom_rect(aes(xmin = 7.5, xmax = 10, ymin = -Inf, ymax = Inf),
fill = "purple", alpha = 0.03) +
annotate(geom = "label", x = 5.5, y = 12.5,
label = "Avg MAPE : 11%",
color = "black",size = 4)+
scale_x_continuous(breaks = 1:10) +
theme_bw() +
labs(title = 'Plot of Splitted Absolute Percentage Error with Distributed Assertive Regression',
subtitle = "Shaded regions indicate MAPE for Front Decile: 3 & Back Decile: 3",
x="Split Index",
y="% MAPE Value",
caption = "Data Boston Housing (mlbench)
Model: Linear Model with Default Parameters")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment