Skip to content

Instantly share code, notes, and snippets.

@ajaypillarisetti
Created August 19, 2013 23:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajaypillarisetti/6275303 to your computer and use it in GitHub Desktop.
Save ajaypillarisetti/6275303 to your computer and use it in GitHub Desktop.
A routine to clean single iButton files. Focuses mainly on iButtons used as Stove Use Monitors (SUMS), but may be otherwise useful. Tested with 1921G, 1922T, and 1922L. 1922E testing ongoing. Does not properly process hygrocron iButtons at this time. Work in progress!
require(plyr)
require(lubridate)
#############################################################
# this is a combined routine to
# 1. resolve different date-time formats between files
# 2. convert F to C
# 3. export as RDS, CSV, or both (TSDB forthcoming)
# 4. create and update an error log
# It can toss all values in 1 file after a negative temp;
# that option is flaggable in the function.
# It also assesses file length, and moves short files
# to a separate "short" directory
#############################################################
#TO DO add TSDB out
#TO DO use first sample timestamp from SUMS header to determine dt format for short files
#default values for subroutines are specified in the initial function parameters
#can easily be changed; for example:
#sums.cleaner(file, tossneg=TRUE, tzone="China/Shanghai", savepath="~/Desktop/Study/Folder",output="csv")
sums.cleaner <- function(file, tossneg=FALSE, tzone="Asia/Kolkata", savepath=getwd(),output=c('csv','rds')){
#check to see if savepath exists
savepathreal <- file.exists(savepath)
#if not, create it
if(savepathreal==FALSE){dir.create(savepath,showWarnings=F)}
#create a file for error logging
#create a timestamp for each log for each run (by day)
currentdt <- substring(gsub(' ','_',gsub(':','',gsub('-','_',Sys.time()))),1,13)
#errorlog path
logloc <- paste(savepath,'/errorlog.',currentdt,'.txt',sep="")
#open error log for writing
sink(logloc,append=T)
#get device type from SUMS header; clean/format
notes <- head(read.csv(file, header=F),21)[,1]
notes <- as.character(notes[1:21])
device <- substring(strsplit(notes[1],":")[[1]][2],2,9)
if(device=="DS1921G-"){device <- "DS1921G"}
if(device=="DS1922/D"){device <- "DS1922X"}
#find line at which to begin import
startimport <- grep("Value",notes)
#import SUMs file
sums <- read.csv(file,skip=startimport, header=F, stringsAsFactors=F)
sums$device <- device
sums$sums_serial_no <- substring(notes[2],nchar(notes[2])-15,nchar(notes[2]))
names(sums)[1:5] <- c('datetime','unit','temp','sumstype','serial')
#correct for fahrenheit files
tempunit <- unique(as.character(sums$unit))
if(tempunit==FALSE){sums$temp <- round((5/9)*(sums$temp - 32),3)}
if(tempunit==FALSE){sums$unit <- "F"}
#use cat to write alert to file -- remove grep(file,filelist), as is frame specific
if(tempunit==FALSE){cat(Sys.time(), " ALERT: ", file, " is in Fahrenheit \n")}
#toss everything after first negative value
#modified to be a settings option, defaults to F
firstneg <- which(sums$temp<0)
if(tossneg & length(firstneg)>0){
sumsarchive <- sums
sums <- sums[1:(min(firstneg)-1),]
cat(Sys.time(), " ALERT: ", file, " contains negative values. \n")
}
#extract month, day, and year from first timestamp of actual data
fulldate <- strsplit(sums[1,1]," ")[[1]][1]
#determine the date delimiter
if(grepl('/',fulldate)){delimiter <- "/"}
if(grepl('-',fulldate)){delimiter <- '-'}
#separate date into three components
firsts <- as.numeric(unique(sapply(strsplit(sums$datetime,delimiter),'[[',1)))
seconds <-as.numeric(unique(sapply(strsplit(sums$datetime,delimiter),'[[',2)))
thirds <- unique(sapply(strsplit(sums$datetime,delimiter),'[[',3))
thirds <- unique(as.numeric(sapply(strsplit(thirds, " "),'[[',1)))
#for the rare 2012 or 2013 in a SUMs file, reduce to two digit year
if(all(nchar(thirds))==4){thirds <- as.numeric(substring(thirds,3,4))}
#figure out sample duration in days
numsamples <- as.numeric(strsplit(notes[grep("Mission Samples", notes)],":")[[1]][2])
sampleinterval <- strsplit(notes[grep("Sample Rate", notes)],":")[[1]][2]
#some the sums record 10 minute intervals; other say 600 seconds
#catch the minutes and convert to seconds
samplescale <- grepl('minute',sampleinterval)
sampleinterval <- as.numeric(strsplit(sampleinterval," ")[[1]][4])
if(samplescale){sampleinterval <- sampleinterval*60}
sampletime <- (numsamples*sampleinterval)/(60*60*24)
if(sampletime<2) {
# remove grep(file,filelist), as is frame specific
cat(Sys.time(), " ALERT: ", file, " is less than 2 days long. \n")
dir.create(file.path(savepath, 'short'), showWarnings = FALSE)
saveRDS(sums,file=paste(savepath,'/short/',newfile,sep=""))
}
sampletimesec <- (numsamples*sampleinterval)
#extract startdate from SUMS header - this is recorded by the device.
startdate <- strsplit(notes[grep("Mission Start", notes)],": ")[[1]][2]
reform <- strsplit(startdate," ")
startdate <- ymd_hms(paste(reform[[1]][6],reform[[1]][2],reform[[1]][3],reform[[1]][4]))
enddate <- startdate + sampletimesec
monthmatch <- month(startdate) == month(enddate)
#from the reconstructed start date
#extract the month, day, and year of the first sample
#as logged in the SUMS header
startmonth <- month(startdate)
startday <- day(startdate)
startyear <- as.numeric(substr(year(startdate),3,4))
# 1. Multiday sample within a month
# First, we need to confirm (1) the duration of the sample and (2) that samples occur within the same month.
if(
#if monthmatch == true, then all sampling is happening in the same month.
monthmatch &
#if the number of unique values in the first and third position are equal
length(unique(firsts)) == length(unique(thirds)) &
#and not equal to the second position
length(unique(seconds))!= length(unique(firsts)) &
#and the third position matches startyear
all(thirds==startyear)){
sums$datetime <- mdy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1)
format <- 'mdy'
}else if(
monthmatch &
length(unique(firsts)) == length(unique(seconds)) &
length(unique(firsts))!= length(unique(thirds)) &
all(firsts==startyear)){
sums$datetime <- ymd_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1)
format <- 'ymd'
}else if(
monthmatch &
length(unique(seconds)) == length(unique(thirds)) &
length(unique(firsts))!= length(unique(thirds)) &
all(thirds==startyear)){
sums$datetime <- dmy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1)
format <- 'dmy'
}else if(
# 2. multiday sample across a new year
# month match == false -- more than one month
# any value in the second field greater than 12
#longer than a month
monthmatch==F &
#second > 12 == day
any(seconds>12) &
#firsts in 12 or 1 -- dec or jan
all(firsts %in% c('12','1')) &
#2012 and 2013 in thirds
all(c(startyear,startyear+1) %in% thirds)
){
sums$datetime <- mdy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1)
format <- 'mdy'
}else if(
# 3. multiday sample throughout the year
#longer than a month, ymd
monthmatch==F &
all(firsts %in% startyear) &
all(seconds %in% startmonth:month(enddate))&
any(thirds>12 | length(thirds)>length(seconds))
){
sums$datetime <- ymd_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1)
format <- 'ymd'
}else if(
#longer than a month, dmy
monthmatch==F &
all(thirds %in% startyear) &
all(seconds %in% startmonth:month(enddate))&
any(firsts>12 | length(firsts)>length(seconds))
){
sums$datetime <- dmy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1)
format <- 'dmy'
}else if(
#longer than a month, mdy
monthmatch==F &
all(thirds %in% startyear) &
all(firsts %in% startmonth:month(enddate))&
any(seconds>12 | length(seconds)>length(firsts))
){
sums$datetime <- mdy_hms(paste(as.character(sums[,1])),tz=tzone,truncated=1)
format <- 'mdy'
}
#get last timestamp from sums file
stoptime<-max(sums$datetime)
stoptime<-strsplit(as.character(stoptime)," ")[[1]][1]
#create filename to contain device type + startdate
filename <- paste(substr(file,1,nchar(file)-4),device,sep="_")
#filenames for csv, rds, and tsdb
newfilerds <- paste(filename,'rds',sep='.')
newfilecsv <- paste(filename,'csv',sep='.')
newfiletsdb <- paste(filename,'tsdb',sep='.')
#check length in days
rows <- nrow(sums)
days <- rows/144
#set a T/F flag for days less than 3
dayslt3 <- days<3
if(dayslt3){
#use cat to write alert to file; remove grep(file,filelist), as is frame specific
cat(Sys.time(), " ALERT: ", file, " is less than 3 days long. \n")
#create a "short" directory and moves files to that folder and save
dir.create(file.path(savepath, 'short'), showWarnings = FALSE)
write.csv(sums,file=paste(savepath,'short/',newfile,sep="/"))
}
#if greater than 3 days, save to the normal folder
else if(all(output=='rds')){
saveRDS(sums,file=paste(savepath,newfilerds,sep="/"))
}else if(all(output=='csv')){
write.csv(sums,file=paste(savepath,newfilecsv,sep="/"))
}else if(all(output %in% c('csv','rds'))){
saveRDS(sums,file=paste(savepath,newfilerds,sep="/"))
write.csv(sums,file=paste(savepath,newfilecsv,sep="/"))
}
#close the sink
sink()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment