Skip to content

Instantly share code, notes, and snippets.

@johnDorian
Created September 21, 2010 06:22
Show Gist options
  • Save johnDorian/589287 to your computer and use it in GitHub Desktop.
Save johnDorian/589287 to your computer and use it in GitHub Desktop.
setwd("~/Documents/code/francesca")
library(TSAgg)
quality<-read.csv("Burke_worked.csv",header=T)
head(quality)
q2<-timeSeries(quality$date,"%d/%m/%Y %H:%M")
q2<-data.frame(q2,quality[,3:7])
temp<-q2[q2[,"hour"]==0,]
#excludes midnight autosampler samples
midnight.wq<-temp[temp[,"Sample.Type"]!="A",]
###Sample 100 samples from numbers 1:900 (every hour is 100 values), and each aggregation is 300 values in length.
#subsampled<-sample(900,100,replace=T)
###After 8am and before 9 am
#a<-length(subsampled[subsampled<100])
###After 9am and before 12 pm
#b<-length(subsampled[subsampled<400])-a
###After 12pm and before 3pm
#c<-length(subsampled[subsampled<700])-b-a
###After 3pm and before 5pm
#d<-length(subsampled)-c-b-a
##a;b;c;d
##Read the flow file, skip header lines, and not include quality of flow sample column.
flow<-read.table("2122052.TXT",header=T,sep=",")[,-3]
###Fix the names up
names(flow)<-c("date","discharge")
flow<-flow[!is.na(flow$discharge),]
head(flow)
###Install the timeSeries aggregation package
flow<-timeSeries(flow$date,"%d/%m/%Y %H:%M",data=flow$discharge)
###timesSeries data frame must start at midnight so the merge with the quality data works.
flow<-flow[-c(1:8),]
head(flow)
##The aggregation can take some time for long datasets like this one.
flow.3hr<-hoursAgg(flow,sum,3)
head(flow.3hr)
names(flow.3hr)<-c("date","discharge")
back.up<-flow.3hr
tail(flow.3hr)
###Now use the merge function to join the WQ and the flow data
#day.id<-as.numeric(rownames(flow.3hr[(flow.3hr[,1]==midnight.wq[1,1]),]))
#
#a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a))
#b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b))
#c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c))
#d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d))
#sampled.day<-rbind(a.data,b.data,c.data,d.data)
#hist(sampled.day$flow)
#day.sum<-data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day$flow),min=min(sampled.day$flow),max=max(sampled.day$flow),median=median(sampled.day$flow),sd=sd(sampled.day$flow),var=var(sampled.day$flow))
###Create a list to save the sampled data into
sampled.day<-list()
###Create an object to put the
day.sum<-NULL
###How many samples for each day?
samples<-100
###Find out the days without flow data
without.flow<-NULL
for(i in 1:length(midnight.wq[,1])){
###Create a subsample dataset
subsampled<-sample(900,samples,replace=T)
##After 8am and before 9 am
a<-length(subsampled[subsampled<100])
##After 9am and before 12 pm
b<-length(subsampled[subsampled<400])-a
##After 12pm and before 3pm
c<-length(subsampled[subsampled<700])-b-a
##After 3pm and before 5pm
d<-length(subsampled)-c-b-a
###Make sure there is flow data for this day.
if(length( flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),1])==1){
####Get the row number of the water quality day in question from the larger flow dataset.
day.id<-as.numeric(rownames(flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),]))
###Create a dataset of the day based on the random samples of each time period.
a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a))
b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b))
c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c))
d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d))
sampled.day[[i]]<-rbind(a.data,b.data,c.data,d.data)
###Add this day summary to the complete output of all the days in question
day.sum<-rbind(day.sum,data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day[[i]]$flow),min=min(sampled.day[[i]]$flow),max=max(sampled.day[[i]]$flow),median=median(sampled.day[[i]]$flow),sd=sd(sampled.day[[i]]$flow),var=var(sampled.day[[i]]$flow)))
} else {
without.flow<-rbind(without.flow,paste(as.Date(midnight.wq[i,1])))
}
}
write.csv(day.sum,"Midnight_flow_Burk.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment