Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# setwd("C:\\Personal\\Blog\\Generating Sample Data In R")
minTemperature <- 20
minHumidity <- 60
minPressure <- 50
n1 <- 200 # Per device 100 records, there are two good devices in this simulation
n2 <- 100 # One defective device
n <- n1+n2 # total records
# creating a dataframe with four columns
df.devices.telemtry <- data.frame(temprature=rep(0, n),
humidity=rep(0, n),
pressure=rep(0, n),
state=rep(NA, n))
set.seed(12345)
# some valid device data
for(i in 1:n1){
currentTemperature = minTemperature + runif(1, 0, 1) * 15
currentHumidity = minHumidity + runif(1, 0, 1) * 15
currentPressure = minPressure + runif(1, 0, 1) * 15
df.devices.telemtry$temprature[i] <- currentTemperature
df.devices.telemtry$humidity[i] <- currentHumidity
df.devices.telemtry$pressure[i] <- currentPressure
df.devices.telemtry$state[i] <- "Good"
}
# some data from bad device
for(i in n1+1:n2){
currentTemperature = 30 + runif(1, 0, 1) * 15
currentHumidity = 70 + runif(1, 0, 1) * 15
currentPressure = 55 + runif(1, 0, 1) * 15
df.devices.telemtry$temprature[i] <- currentTemperature
df.devices.telemtry$humidity[i] <- currentHumidity
df.devices.telemtry$pressure[i] <- currentPressure
df.devices.telemtry$state[i] <- "Bad"
}
df.devices.telemtry$originalState <- df.devices.telemtry$state
df.devices.telemtry$state <- df.devices.telemtry$originalState
# converting to factor
df.devices.telemtry$state <- as.factor(df.devices.telemtry$state)
# plotting
library(ggplot2)
p <- ggplot(data = df.devices.telemtry, aes(x = temprature, y = pressure))
p + geom_point(aes(color = state)) +
ggtitle("Telemetry data Good vs Bad") +
xlab("Device Temprature") +
ylab("Device Pressure")
p <- ggplot(data = df.devices.telemtry, aes(x = temprature, y = humidity))
p + geom_point(aes(color = state)) +
ggtitle("Telemetry data Good vs Bad") +
xlab("Device Temprature") +
ylab("Device Humidity")
# str(df.devices.telemtry)
prop.table(table(df.devices.telemtry$state))
# splitting data into train and test
install.packages("caret")
library(caret)
set.seed(123456)
idx <- createDataPartition(df.devices.telemtry$state, p = 0.7, list=FALSE)
training <- df.devices.telemtry[idx, ]
test <- df.devices.telemtry[-idx, ]
# making sure training data has got same proption of state
prop.table(table(training$state))
# fitting logistic regression
model <- glm(state ~ temprature + pressure + humidity,
data = training,
family = binomial())
summary(model)
predicted <- predict(model,
newdata = test[, c("temprature", "pressure", "humidity")],
type = "response")
table(test$state, predicted > 0.50)
# construct a table which shows actual, predicted and its probability
df <- data.frame(actual=test$state, predicted=predicted > 0.5, probability=predicted)
# inspect first 5 records
head(df, 5)
# inspect last 5 records
tail(df, 5)
summary(predicted)
table(test$state, predicted > 0.65)
# construct a table which shows actual, predicted and its probability
df <- data.frame(actual=test$state, predicted=predicted > 0.65, probability=predicted)
summary(predicted)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment