Skip to content

Instantly share code, notes, and snippets.

@selva86
Created November 26, 2015 07:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save selva86/f01b6ffd13ec0a93406d to your computer and use it in GitHub Desktop.
Save selva86/f01b6ffd13ec0a93406d to your computer and use it in GitHub Desktop.
remove_heteroscedasticity_example.R
.libPaths()
url <- "http://rstatistics.net/wp-content/uploads/2015/09/ozone.csv"
inputData <- read.csv(url)
# Replace outliers as missing values.
replace_outlier_with_missing <- function(x, na.rm = TRUE, ...) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...) # get %iles
H <- 1.5 * IQR(x, na.rm = na.rm) # outlier limit threshold
y <- x
y[x < (qnt[1] - H)] <- NA # replace values below lower bounds
y[x > (qnt[2] + H)] <- NA # replace values above higher bound
return(y) # returns treated variable
}
inputData_cont <- inputData[, c("pressure_height", "Wind_speed", "Humidity", "Temperature_Sandburg", "Temperature_ElMonte", "Inversion_base_height", "Pressure_gradient", "Inversion_temperature", "Visibility")]
inputData_cont <- as.data.frame(sapply(inputData_cont, replace_outlier_with_missing))
cont_vars <- c("pressure_height", "Wind_speed", "Humidity", "Temperature_Sandburg", "Temperature_ElMonte", "Inversion_base_height", "Pressure_gradient", "Inversion_temperature", "Visibility")
inputData <- cbind(inputData[!names(inputData) %in% cont_vars], inputData_cont)
# Missing value imputation with DMwR
library(DMwR)
library(lmtest)
library(caret)
par(mfrow=c(2,2))
input <- inputData
inputData <- knnImputation(input)
mod <- lm(ozone_reading ~ . , data=inputData)
mod <- lm(ozone_reading1 ~ . - Inversion_temperature, data=inputData)
summary(mod)
car::vif(mod)
plot(mod)
bptest(mod)
# Box Cox
bc1 <- BoxCoxTrans(inputData$ozone_reading)
bc2 <- BoxCoxTrans(inputData$Month)
bc3 <- BoxCoxTrans(inputData$pressure_height)
bc4 <- BoxCoxTrans(inputData$Humidity)
bc5 <- BoxCoxTrans(inputData$Temperature_ElMonte)
bc6 <- BoxCoxTrans(inputData$Inversion_base_height)
bc7 <- BoxCoxTrans(inputData$Inversion_temperature)
inputData$ozone_reading1 <- predict(bc1, inputData$ozone_reading)
inputData$Month1 <- predict(bc1, inputData$Month)
inputData$pressure_height1 <- predict(bc1, inputData$pressure_height)
inputData$Humidity1 <- predict(bc1, inputData$Humidity)
inputData$Temperature_ElMonte1 <- predict(bc1, inputData$Temperature_ElMonte)
inputData$Inversion_base_height1 <- predict(bc1, inputData$Inversion_base_height)
inputData$Inversion_temperature1 <- predict(bc1, inputData$Inversion_temperature)
mod <- lm(ozone_reading1 ~ Month1 + pressure_height1 + Humidity1 + Temperature_ElMonte1 + Inversion_base_height1 + Inversion_temperature1, data=inputData)
mod <- lm(ozone_reading1 ~ . -ozone_reading - Month - Day_of_week - - Wind_speed - pressure_height - Humidity - Temperature_ElMonte - Inversion_temperature - Inversion_base_height, data=inputData)
mod <- lm(ozone_reading1 ~ Month1 + Temperature_ElMonte + Inversion_temperature1 * Inversion_base_height1, data=inputData)
mod <- lm(ozone_reading ~ Month1 + Temperature_ElMonte * Inversion_base_height1, data=inputData)
summary(mod)
car::vif(mod)
plot(mod)
bptest(mod)
.libPaths(c("/Library/Frameworks/R.framework/Versions/3.2/Resources/library", .libPaths()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment