Skip to content

Instantly share code, notes, and snippets.

@priyankajayaswal1
Created June 2, 2015 11:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save priyankajayaswal1/af7bfecd11076b409817 to your computer and use it in GitHub Desktop.
Save priyankajayaswal1/af7bfecd11076b409817 to your computer and use it in GitHub Desktop.
library(sqldf)
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE)
x$Date<- as.Date(x$Date,"%d/%m/%Y")
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S")
x$Global_active_power = as.numeric(x$Global_active_power)
x$Global_reactive_power = as.numeric(x$Global_reactive_power)
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power)
# Creating sample pools for six month data
first<- as.Date('16-12-2008',format = '%d-%m-%Y')
second <- as.Date('30-06-2009',format = '%d-%m-%Y')
y = fn$sqldf("select * from x where Date>=$first and Date<=$second")
# Creating sample pools of data at time 0, 15min, 30min, 45min
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'")
n = as.numeric(sqldf("select count(distinct Time) from z"))
# Checking for which central tendency is apt
t = sqldf("select distinct Time from z")
t$Time = format(as.POSIXlt(t$Time, format = "%H:%M:%S"), format="%H:%M:%S")
tn = as.numeric(sqldf("select count(*) from t"))
Row_no = KVA = seq(1,tn)
t= cbind(t,Row_no,KVA)
rep_data = data.frame()
for (j in 1:n)
{
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)")
for (i in 1:7)
{
f = fn$sqldf("select * from e where Date%7=$i-1")
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.")
lines(f$Date,f$KVA)
# Finding the best optimized central tendency for the dataset
#(forparticular weekday and particular time)
nn = as.numeric(sqldf("select count(*) from f"))
hippo =d = NULL
k=0
for (i1 in seq(1,nn))
{
d=NULL
d1 = as.numeric(fn$sqldf("select KVA from f limit $i1-1,1"))
for (j1 in 1:nn)
{
d2 = as.numeric(fn$sqldf("select KVA from f limit $j1-1,1"))
d[j1]=(d2-d1)*.16
}
hippo[i1]=sum(d)
k = k+1
f$hippo[k]=hippo[i1]
print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i1]))
}
rep_data=sqldf("select * from f where hippo=(select max(hippo)from f) limit 1")
repkva = as.numeric((sqldf("select KVA from rep_data")))
reptime = as.character(sqldf("select Time from rep_data"))
r = as.numeric(fn$sqldf("select Row_no from t where Time=(select Time from rep_data)"))
t$KVA[r]=repkva
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment