remove_heteroscedasticity_example.R
.libPaths() | |
url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv" | |
inputData <- read.csv(url) | |
# Replace outliers as missing values. | |
replace_outlier_with_missing <- function(x, na.rm = TRUE, ...) { | |
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...) # get %iles | |
H <- 1.5 * IQR(x, na.rm = na.rm) # outlier limit threshold | |
y <- x | |
y[x < (qnt[1] - H)] <- NA # replace values below lower bounds | |
y[x > (qnt[2] + H)] <- NA # replace values above higher bound | |
return(y) # returns treated variable | |
} | |
inputData_cont <- inputData[, c("pressure_height", "Wind_speed", "Humidity", "Temperature_Sandburg", "Temperature_ElMonte", "Inversion_base_height", "Pressure_gradient", "Inversion_temperature", "Visibility")] | |
inputData_cont <- as.data.frame(sapply(inputData_cont, replace_outlier_with_missing)) | |
cont_vars <- c("pressure_height", "Wind_speed", "Humidity", "Temperature_Sandburg", "Temperature_ElMonte", "Inversion_base_height", "Pressure_gradient", "Inversion_temperature", "Visibility") | |
inputData <- cbind(inputData[!names(inputData) %in% cont_vars], inputData_cont) | |
# Missing value imputation with DMwR | |
library(DMwR) | |
library(lmtest) | |
library(caret) | |
par(mfrow=c(2,2)) | |
input <- inputData | |
inputData <- knnImputation(input) | |
mod <- lm(ozone_reading ~ . , data=inputData) | |
mod <- lm(ozone_reading1 ~ . - Inversion_temperature, data=inputData) | |
summary(mod) | |
car::vif(mod) | |
plot(mod) | |
bptest(mod) | |
# Box Cox | |
bc1 <- BoxCoxTrans(inputData$ozone_reading) | |
bc2 <- BoxCoxTrans(inputData$Month) | |
bc3 <- BoxCoxTrans(inputData$pressure_height) | |
bc4 <- BoxCoxTrans(inputData$Humidity) | |
bc5 <- BoxCoxTrans(inputData$Temperature_ElMonte) | |
bc6 <- BoxCoxTrans(inputData$Inversion_base_height) | |
bc7 <- BoxCoxTrans(inputData$Inversion_temperature) | |
inputData$ozone_reading1 <- predict(bc1, inputData$ozone_reading) | |
inputData$Month1 <- predict(bc1, inputData$Month) | |
inputData$pressure_height1 <- predict(bc1, inputData$pressure_height) | |
inputData$Humidity1 <- predict(bc1, inputData$Humidity) | |
inputData$Temperature_ElMonte1 <- predict(bc1, inputData$Temperature_ElMonte) | |
inputData$Inversion_base_height1 <- predict(bc1, inputData$Inversion_base_height) | |
inputData$Inversion_temperature1 <- predict(bc1, inputData$Inversion_temperature) | |
mod <- lm(ozone_reading1 ~ Month1 + pressure_height1 + Humidity1 + Temperature_ElMonte1 + Inversion_base_height1 + Inversion_temperature1, data=inputData) | |
mod <- lm(ozone_reading1 ~ . -ozone_reading - Month - Day_of_week - - Wind_speed - pressure_height - Humidity - Temperature_ElMonte - Inversion_temperature - Inversion_base_height, data=inputData) | |
mod <- lm(ozone_reading1 ~ Month1 + Temperature_ElMonte + Inversion_temperature1 * Inversion_base_height1, data=inputData) | |
mod <- lm(ozone_reading ~ Month1 + Temperature_ElMonte * Inversion_base_height1, data=inputData) | |
summary(mod) | |
car::vif(mod) | |
plot(mod) | |
bptest(mod) | |
.libPaths(c("/Library/Frameworks/R.framework/Versions/3.2/Resources/library", .libPaths())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment