Skip to content

Instantly share code, notes, and snippets.

@roneyfraga
Last active September 7, 2015 01:32
Show Gist options
  • Save roneyfraga/cd7656fc67e6e78ba573 to your computer and use it in GitHub Desktop.
Save roneyfraga/cd7656fc67e6e78ba573 to your computer and use it in GitHub Desktop.
#!/usr/bin/env Rscript
# to rum this code do:
# chmod +x process-analysis.R
# ./process-analysis.R
# set work directory
if( length(grep('xmls',dir()))>0 ) {
setwd('xmls') } else {
stop('The xmls folder does not exist.')
}
if( version$os=="darwin13.4.0" ){
# list of files
lfiles = system("ls -lT | awk -v OFS=' ' 'NR>1 { print $9, $6, $7, $8, $5, $10}' ", intern=TRUE)
lfiles2 = (strsplit(lfiles,' '))
df = data.frame(t(sapply(lfiles2, `[`)))
df$date = paste(paste(df[,1], df[,2], df[,3], sep='-'), df[,4], sep=' ')
# as data type
df$date <- strptime(df$date, "%Y-%b-%d %H:%M:%S")
df <- df[order(df$date),]
# id length
df$ncharId <- nchar(as.character(df[,6]))
tncharId <- table(df$ncharId)
# average file size
avefile <- mean(as.numeric(df[,5]))
} else{ # if Linux
# list of files
lfiles = system("ls -l --full-time | awk -v OFS=' ' 'NR>1 { print $9, $6, $7, $5, $10}' ", intern=TRUE)
lfiles2 = (strsplit(lfiles,' '))
df = data.frame(t(sapply(lfiles2, `[`)))
df$date = paste(df[,2], df[,3], sep=' ')
# as data type
df$date <- strptime(df$date, "%Y-%M-%d %H:%M:%S")
df <- df[order(df$date),]
# id length
df$ncharId <- nchar(as.character(df[,1]))
tncharId <- table(df$ncharId)
# average file size
avefile <- mean(as.numeric(df[,4]))
}
nfiles <- dim(df)[1]
# time
tsec <- difftime(df[length(df$date),'date'], df[1,'date'],units='secs')
tmin <- difftime(df[length(df$date),'date'], df[1,'date'],units='mins')
thou <- difftime(df[length(df$date),'date'], df[1,'date'],units='hours')
cat('Performance:', tsec, 'secs for download', nfiles, '==', nfiles/as.numeric(tsec),'files per second', '\n')
cat('Performance:', tmin, 'min for download', nfiles, '==', nfiles/as.numeric(tmin),'files per minute', '\n')
cat('Performance:', thou, 'hours for download', nfiles, '==', nfiles/as.numeric(thou),'files per hour', '\n')
cat('Files downloaded:', nfiles, '\n')
cat('Files names repeated:', tncharId[2], '\n')
cat('Average file size:', avefile,'kbyte', '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment