Skip to content

Instantly share code, notes, and snippets.

@tehp
Last active April 7, 2020 06:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tehp/0e79031b8942a16cc5a499df9a00f40a to your computer and use it in GitHub Desktop.
Save tehp/0e79031b8942a16cc5a499df9a00f40a to your computer and use it in GitHub Desktop.
library("ggplot2")
library("depmixS4")
library("lubridate")
library("chron")
library("dplyr")
library("corrplot")
getwd()
df <- read.table("TrainData.txt", header = T, sep = ",")
df$Date <- as.Date(df$Date, format="%d/%m/%Y")
df$Time <- format(as.POSIXlt(strptime(df$Time, "%H:%M:%S"), format="%H:%M:%S"), "%H:%M:%S")
#Data exploration
correlationMatrix <- cor(df[3:9], method="pearson", use = "complete.obs")
corrplot(correlationMatrix, type = "upper")
#Determine a single observation time window during a weekday and a weekend day
#that shows a clearly recognizable electricity consumption pattern over a time period of several hours
weekday_window_data <- df[wday(df$Date) == 2, ]
weekend_window_data <- df[wday(df$Date) == 7, ]
### monday and saturday subsets for 7 to 9 am ###
monday <- subset(df, wday(df$Date) == 2)
saturday <- subset(df, wday(df$Date) == 7)
monday <- monday[complete.cases(monday),]
saturday <- saturday[complete.cases(saturday),]
mondayW <- subset(monday, (monday$Time >= '07:00:00' & monday$Time <= "09:00:00"))
saturdayW <- subset(saturday, (saturday$Time >= '07:00:00' & saturday$Time <= "09:00:00"))
get_ntimes <- function(training) {
counts <- table(training$Date)
as.vector(counts)
}
training_weekday <- mondayW[mondayW$Date <= "2008-12-31",]
testing_weekday <- setdiff(mondayW, training_weekday)
training_weekend <- saturdayW[saturdayW$Date <= "2008-12-31",]
testing_weekend <- setdiff(saturdayW, training_weekend)
weekday_ntimes = get_ntimes(training_weekday)
weekend_ntimes = get_ntimes(training_weekend)
# Rolling Average
# Moving window week
mw = df[df$Date >= "2009-11-01" & df$Date <= "2009-11-07",]
# MW days
mw_sat <- subset(mw, wday(mw$Date) == 2)
mw_mon <- subset(mw, wday(mw$Date) == 7)
# MW time
mw_sat_window <- mw_sat[mw_sat$Time >= '07:00:00' & mw_sat$Time <= "09:00:00",]
mw_mon_window <- mw_mon[mw_mon$Time >= '07:00:00' & mw_mon$Time <= "09:00:00",]
# Plot the unsmoothed data (gray)
x <- (strptime(mw_sat_window$Time, '%H:%M:%S'))
y <- mw_sat_window$Global_active_power
plot(x, y, type="l", col=grey(.5), main="Global_active_power MA from 7am-9am, 2009-11-02",sub = "(2 sided MA, 10 levels)", xlab="Time", ylab="Global_active_power",)
f20 <- rep(1/10,10)
f20
y_sym <- stats::filter(y, f20, sides=2)
lines(x, y_sym, col="blue")
legend("topleft", inset=.01,
c("Moving Average","Original Data"), fill=c("blue","grey"), horiz=FALSE)
legend("topright", inset=.01,
c("Minor Anomaly","Major Anomaly", "Extreme Anomaly"), fill=c("yellow","orange","red"), horiz=FALSE)
stddev <- sd(y_sym, na.rm = TRUE)
threshold_minor <- 1*stddev
threshold_major <- 2*stddev
threshold_extreme <- 4*stddev
minor_occurances <- 0
major_occurances <- 0
extreme_occurances <- 0
for(i in 1:length(y_sym)) {
if (is.na(y_sym[i])) {
# print("na")
} else {
difference <- abs(y[i] - y_sym[i])
if (difference >= threshold_minor) {
if (difference >= threshold_major) {
if (difference >= threshold_extreme) {
points(strptime(mw_sat_window[i, "Time"], '%H:%M:%S'), y[i], col = "red", pch = 19)
extreme_occurances = extreme_occurances + 1;
next
}
points(strptime(mw_sat_window[i, "Time"], '%H:%M:%S'), y[i], col = "orange", pch = 19)
major_occurances = major_occurances + 1;
next
}
points(strptime(mw_sat_window[i, "Time"], '%H:%M:%S'), y[i], col = "yellow", pch = 19)
minor_occurances = minor_occurances + 1;
next
}
next
}
}
print(minor_occurances)
print(major_occurances)
print(extreme_occurances)
# =======
# Plot the unsmoothed data (gray)
x <- (strptime(mw_mon_window$Time, '%H:%M:%S'))
y <- mw_mon_window$Global_active_power
plot(x, y, type="l", col=grey(.5), main="Global_active_power MA from 7am-9am, 2009-11-07",sub = "(2 sided MA, 10 levels)", xlab="Time", ylab="Global_active_power",)
f20 <- rep(1/10,10)
f20
y_sym <- stats::filter(y, f20, sides=2)
lines(x, y_sym, col="blue")
legend("topleft", inset=.01,
c("Moving Average","Original Data"), fill=c("blue","grey"), horiz=FALSE)
legend("topright", inset=.01,
c("Minor Anomaly","Major Anomaly", "Extreme Anomaly"), fill=c("yellow","orange","red"), horiz=FALSE)
stddev <- sd(y_sym, na.rm = TRUE)
threshold_minor <- 1*stddev
threshold_major <- 2*stddev
threshold_extreme <- 4*stddev
minor_occurances <- 0
major_occurances <- 0
extreme_occurances <- 0
for(i in 1:length(y_sym)) {
if (is.na(y_sym[i])) {
# print("na")
} else {
difference <- abs(y[i] - y_sym[i])
if (difference >= threshold_minor) {
if (difference >= threshold_major) {
if (difference >= threshold_extreme) {
points(strptime(mw_sat_window[i, "Time"], '%H:%M:%S'), y[i], col = "red", pch = 19)
extreme_occurances = extreme_occurances + 1;
next
}
points(strptime(mw_sat_window[i, "Time"], '%H:%M:%S'), y[i], col = "orange", pch = 19)
major_occurances = major_occurances + 1;
next
}
points(strptime(mw_sat_window[i, "Time"], '%H:%M:%S'), y[i], col = "yellow", pch = 19)
minor_occurances = minor_occurances + 1;
next
}
next
}
}
print(minor_occurances)
print(major_occurances)
print(extreme_occurances)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment