Created
August 19, 2013 23:12
-
-
Save ajaypillarisetti/6275303 to your computer and use it in GitHub Desktop.
A routine to clean single iButton files. Focuses mainly on iButtons used as Stove Use Monitors (SUMS), but may be otherwise useful. Tested with 1921G, 1922T, and 1922L. 1922E testing ongoing. Does not properly process hygrocron iButtons at this time. Work in progress!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(plyr) | |
require(lubridate) | |
############################################################# | |
# this is a combined routine to | |
# 1. resolve different date-time formats between files | |
# 2. convert F to C | |
# 3. export as RDS, CSV, or both (TSDB forthcoming) | |
# 4. create and update an error log | |
# It can toss all values in 1 file after a negative temp; | |
# that option is flaggable in the function. | |
# It also assesses file length, and moves short files | |
# to a separate "short" directory | |
############################################################# | |
#TO DO add TSDB out | |
#TO DO use first sample timestamp from SUMS header to determine dt format for short files | |
#default values for subroutines are specified in the initial function parameters | |
#can easily be changed; for example: | |
#sums.cleaner(file, tossneg=TRUE, tzone="China/Shanghai", savepath="~/Desktop/Study/Folder",output="csv") | |
sums.cleaner <- function(file, tossneg=FALSE, tzone="Asia/Kolkata", savepath=getwd(),output=c('csv','rds')){ | |
#check to see if savepath exists | |
savepathreal <- file.exists(savepath) | |
#if not, create it | |
if(savepathreal==FALSE){dir.create(savepath,showWarnings=F)} | |
#create a file for error logging | |
#create a timestamp for each log for each run (by day) | |
currentdt <- substring(gsub(' ','_',gsub(':','',gsub('-','_',Sys.time()))),1,13) | |
#errorlog path | |
logloc <- paste(savepath,'/errorlog.',currentdt,'.txt',sep="") | |
#open error log for writing | |
sink(logloc,append=T) | |
#get device type from SUMS header; clean/format | |
notes <- head(read.csv(file, header=F),21)[,1] | |
notes <- as.character(notes[1:21]) | |
device <- substring(strsplit(notes[1],":")[[1]][2],2,9) | |
if(device=="DS1921G-"){device <- "DS1921G"} | |
if(device=="DS1922/D"){device <- "DS1922X"} | |
#find line at which to begin import | |
startimport <- grep("Value",notes) | |
#import SUMs file | |
sums <- read.csv(file,skip=startimport, header=F, stringsAsFactors=F) | |
sums$device <- device | |
sums$sums_serial_no <- substring(notes[2],nchar(notes[2])-15,nchar(notes[2])) | |
names(sums)[1:5] <- c('datetime','unit','temp','sumstype','serial') | |
#correct for fahrenheit files | |
tempunit <- unique(as.character(sums$unit)) | |
if(tempunit==FALSE){sums$temp <- round((5/9)*(sums$temp - 32),3)} | |
if(tempunit==FALSE){sums$unit <- "F"} | |
#use cat to write alert to file -- remove grep(file,filelist), as is frame specific | |
if(tempunit==FALSE){cat(Sys.time(), " ALERT: ", file, " is in Fahrenheit \n")} | |
#toss everything after first negative value | |
#modified to be a settings option, defaults to F | |
firstneg <- which(sums$temp<0) | |
if(tossneg & length(firstneg)>0){ | |
sumsarchive <- sums | |
sums <- sums[1:(min(firstneg)-1),] | |
cat(Sys.time(), " ALERT: ", file, " contains negative values. \n") | |
} | |
#extract month, day, and year from first timestamp of actual data | |
fulldate <- strsplit(sums[1,1]," ")[[1]][1] | |
#determine the date delimiter | |
if(grepl('/',fulldate)){delimiter <- "/"} | |
if(grepl('-',fulldate)){delimiter <- '-'} | |
#separate date into three components | |
firsts <- as.numeric(unique(sapply(strsplit(sums$datetime,delimiter),'[[',1))) | |
seconds <-as.numeric(unique(sapply(strsplit(sums$datetime,delimiter),'[[',2))) | |
thirds <- unique(sapply(strsplit(sums$datetime,delimiter),'[[',3)) | |
thirds <- unique(as.numeric(sapply(strsplit(thirds, " "),'[[',1))) | |
#for the rare 2012 or 2013 in a SUMs file, reduce to two digit year | |
if(all(nchar(thirds))==4){thirds <- as.numeric(substring(thirds,3,4))} | |
#figure out sample duration in days | |
numsamples <- as.numeric(strsplit(notes[grep("Mission Samples", notes)],":")[[1]][2]) | |
sampleinterval <- strsplit(notes[grep("Sample Rate", notes)],":")[[1]][2] | |
#some the sums record 10 minute intervals; other say 600 seconds | |
#catch the minutes and convert to seconds | |
samplescale <- grepl('minute',sampleinterval) | |
sampleinterval <- as.numeric(strsplit(sampleinterval," ")[[1]][4]) | |
if(samplescale){sampleinterval <- sampleinterval*60} | |
sampletime <- (numsamples*sampleinterval)/(60*60*24) | |
if(sampletime<2) { | |
# remove grep(file,filelist), as is frame specific | |
cat(Sys.time(), " ALERT: ", file, " is less than 2 days long. \n") | |
dir.create(file.path(savepath, 'short'), showWarnings = FALSE) | |
saveRDS(sums,file=paste(savepath,'/short/',newfile,sep="")) | |
} | |
sampletimesec <- (numsamples*sampleinterval) | |
#extract startdate from SUMS header - this is recorded by the device. | |
startdate <- strsplit(notes[grep("Mission Start", notes)],": ")[[1]][2] | |
reform <- strsplit(startdate," ") | |
startdate <- ymd_hms(paste(reform[[1]][6],reform[[1]][2],reform[[1]][3],reform[[1]][4])) | |
enddate <- startdate + sampletimesec | |
monthmatch <- month(startdate) == month(enddate) | |
#from the reconstructed start date | |
#extract the month, day, and year of the first sample | |
#as logged in the SUMS header | |
startmonth <- month(startdate) | |
startday <- day(startdate) | |
startyear <- as.numeric(substr(year(startdate),3,4)) | |
# 1. Multiday sample within a month | |
# First, we need to confirm (1) the duration of the sample and (2) that samples occur within the same month. | |
if( | |
#if monthmatch == true, then all sampling is happening in the same month. | |
monthmatch & | |
#if the number of unique values in the first and third position are equal | |
length(unique(firsts)) == length(unique(thirds)) & | |
#and not equal to the second position | |
length(unique(seconds))!= length(unique(firsts)) & | |
#and the third position matches startyear | |
all(thirds==startyear)){ | |
sums$datetime <- mdy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1) | |
format <- 'mdy' | |
}else if( | |
monthmatch & | |
length(unique(firsts)) == length(unique(seconds)) & | |
length(unique(firsts))!= length(unique(thirds)) & | |
all(firsts==startyear)){ | |
sums$datetime <- ymd_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1) | |
format <- 'ymd' | |
}else if( | |
monthmatch & | |
length(unique(seconds)) == length(unique(thirds)) & | |
length(unique(firsts))!= length(unique(thirds)) & | |
all(thirds==startyear)){ | |
sums$datetime <- dmy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1) | |
format <- 'dmy' | |
}else if( | |
# 2. multiday sample across a new year | |
# month match == false -- more than one month | |
# any value in the second field greater than 12 | |
#longer than a month | |
monthmatch==F & | |
#second > 12 == day | |
any(seconds>12) & | |
#firsts in 12 or 1 -- dec or jan | |
all(firsts %in% c('12','1')) & | |
#2012 and 2013 in thirds | |
all(c(startyear,startyear+1) %in% thirds) | |
){ | |
sums$datetime <- mdy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1) | |
format <- 'mdy' | |
}else if( | |
# 3. multiday sample throughout the year | |
#longer than a month, ymd | |
monthmatch==F & | |
all(firsts %in% startyear) & | |
all(seconds %in% startmonth:month(enddate))& | |
any(thirds>12 | length(thirds)>length(seconds)) | |
){ | |
sums$datetime <- ymd_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1) | |
format <- 'ymd' | |
}else if( | |
#longer than a month, dmy | |
monthmatch==F & | |
all(thirds %in% startyear) & | |
all(seconds %in% startmonth:month(enddate))& | |
any(firsts>12 | length(firsts)>length(seconds)) | |
){ | |
sums$datetime <- dmy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1) | |
format <- 'dmy' | |
}else if( | |
#longer than a month, mdy | |
monthmatch==F & | |
all(thirds %in% startyear) & | |
all(firsts %in% startmonth:month(enddate))& | |
any(seconds>12 | length(seconds)>length(firsts)) | |
){ | |
sums$datetime <- mdy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1) | |
format <- 'mdy' | |
} | |
#get last timestamp from sums file | |
stoptime<-max(sums$datetime) | |
stoptime<-strsplit(as.character(stoptime)," ")[[1]][1] | |
#create filename to contain device type + startdate | |
filename <- paste(substr(file,1,nchar(file)-4),device,sep="_") | |
#filenames for csv, rds, and tsdb | |
newfilerds <- paste(filename,'rds',sep='.') | |
newfilecsv <- paste(filename,'csv',sep='.') | |
newfiletsdb <- paste(filename,'tsdb',sep='.') | |
#check length in days | |
rows <- nrow(sums) | |
days <- rows/144 | |
#set a T/F flag for days less than 3 | |
dayslt3 <- days<3 | |
if(dayslt3){ | |
#use cat to write alert to file; remove grep(file,filelist), as is frame specific | |
cat(Sys.time(), " ALERT: ", file, " is less than 3 days long. \n") | |
#create a "short" directory and moves files to that folder and save | |
dir.create(file.path(savepath, 'short'), showWarnings = FALSE) | |
write.csv(sums,file=paste(savepath,'short/',newfile,sep="/")) | |
} | |
#if greater than 3 days, save to the normal folder | |
else if(all(output=='rds')){ | |
saveRDS(sums,file=paste(savepath,newfilerds,sep="/")) | |
}else if(all(output=='csv')){ | |
write.csv(sums,file=paste(savepath,newfilecsv,sep="/")) | |
}else if(all(output %in% c('csv','rds'))){ | |
saveRDS(sums,file=paste(savepath,newfilerds,sep="/")) | |
write.csv(sums,file=paste(savepath,newfilecsv,sep="/")) | |
} | |
#close the sink | |
sink() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment