Skip to content

Instantly share code, notes, and snippets.

@btihen
Last active August 29, 2015 14:02
Show Gist options
  • Save btihen/f2cef87050bc29a83d2d to your computer and use it in GitHub Desktop.
Save btihen/f2cef87050bc29a83d2d to your computer and use it in GitHub Desktop.
Return Analysis - needs colors, line graph (box plots too)
----------------
# returns analysis
# Start in Bash
# METHOD 1
curl -k https://inventory.las.ch/serial.csv?scope=computers > laptop-inventory.csv
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 > return-data.csv
echo "day hour" > return-intensive-by-hour.csv
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 | sed 's/:[0-9][0-9]:[0-9][0-9]$/:00/g' | grep '2014-05' | sed 's/2014-05-//g' | grep '2[0-9]' >> return-intensive-by-hour.csv
echo "day time" > return-intensive-by-tenmin.csv
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 | sed 's/[0-9]:[0-9][0-9]$/0/g' | grep '2014-05' | sed 's/2014-05-//g' | grep '2[0-9]' >> return-intensive-by-tenmin.csv
# switch to r
interest_hr = read.csv("return-intensive-by-hour.csv",sep=" ")
table(interest_hr)
library(vcd)
structable(table(interest_hr))
pdf(file="./Returns_Each_Day.pdf")
barplot(interest_hr$day,main="Computer Returns by Day",xlab="Day",ylab="Returned Computers")
dev.off()
hour_day = as.data.frame.matrix(table(interest_hr$hour, interest_hr$day))
day_hour = as.data.frame.matrix(table(interest_hr$day, interest_hr$hour))
pdf(file="./Returns_Each_Hour.pdf")
boxplot(day_hour,main="Returns by hour",xlab="Hour",ylab="Returned Computers")
matlines(hour_day,pch=interest_hr$day,col=interest_hr$day,main="Returns each hour",xlab="Hour",ylab="Returned Computers")
dev.off()
boxplot(day_hour,main="Returns each hour",xlab="Hour",ylab="Returned Computers")
matlines(hour_day,pch=interest_hr$day,col=interest_hr$day,main="Returns each hour",xlab="Hour",ylab="Returned Computers")
barplot(interest_hr$day,main="Computer Returns by Day",xlab="Day",ylab="Returned Computers")
interest_ten = read.csv("return-intensive-by-tenmin.csv",sep=" ")
table(interest_ten)
library(vcd)
structable(table(interest_ten))
time_day_ten = as.data.frame.matrix(table(interest_ten$time, interest_ten$day))
day_time_ten = as.data.frame.matrix(table(interest_ten$day, interest_ten$time))
pdf(file="./Returns_Each_10mins.pdf")
boxplot(day_time_ten,main="Returns each 10 min",xlab="Time",ylab="Returned Computers")
matlines(time_day_ten,pch=19,col=interest_ten$day,main="Returns each 10 mins",xlab="Time",ylab="Returned Computers")
dev.off()
boxplot(day_time_ten,main="Returns each 10 min",xlab="Time",ylab="Returned Computers")
matlines(time_day_ten,pch=19,col=interest_ten$day,main="Returns each 10 mins",xlab="Time",ylab="Returned Computers")
#--------------
# METHOD 2 - POSIX time seems off by a day and possibly an hour!
# Start in R
#get data from inventory
fileURL = "https://inventory.las.ch/serial.csv?scope=computers"
download.file(fileURL, destfile="./computer-inventory.csv", method="curl")
# load data into r
returns = read.csv("./computer-inventory.csv",sep=";")
#returns=read.csv("returns-laptops-2014wk24.csv",sep=";")
#head(returns)
#str(returns)
#names(returns)
# get return information - separate from all other info
ret_fac = subset(returns$X44.usr_return_at, grepl("2014", returns$X44.usr_return_at))
# convert to date_time data
ret_dt = as.POSIXlt(ret_fac)
# get days of interest
interest = subset(ret_dt, ret_dt > as.POSIXlt("2014-05-21") & ret_dt < as.POSIXlt("2014-05-29"))
# extract the interesting days
just_day = as.character( round(interest, "day"))
# just the hours
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC+0100") + round(as.numeric(interest)/3600)*3600,"%H:%M")
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/3600)*3600,"%H:%M")
just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="CET") + round(as.numeric(interest)/3600)*3600,"%H:%M")
interest_df = data.frame(just_day, just_hour)
# count interesting day frequencies
count_by_hr = table(just_day, just_hour)
by_quarter_hr = as.character(format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/900)*900,"%H:%M"))
count_by_quarter = table(just_day, by_quarter_hr)
# now how to get box charts and line charts out of this table (colorized by day)
pdf(file="./returns_by_hour.pdf")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
dev.off()
pdf(file="./returns_by_quarter_hour.pdf")
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
dev.off()
pdf(file="./returns_by_day.pdf")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")
dev.off()
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")
library(vcd)
structable(counts)
#----------
# METHOD 3?
# start in R
#get data from inventory
fileURL = "https://inventory.las.ch/serial.csv?scope=computers"
download.file(fileURL, destfile="./computer-inventory.csv", method="curl")
# load data into r
computers = read.csv("./computer-inventory.csv",sep=";")
# get laptops
laptops = subset(returns, grepl("laptop", returns$X30.device_type))
# get return time-dates (from 20th to 29th may 2014 -- 2014-05-2x)
returns = subset(laptops$X44.usr_return_at, grepl("2014-05-2", laptops$X44.usr_return_at))
chars = as.character(returns)
day = gsub("2014-05-","",chars))
#time = gsub("^2014-05-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]$", )
----------------
# returns analysis
#get data from inventory
fileURL = "https://inventory.las.ch/serial.csv?scope=computers"
download.file(fileURL, destfile="./computer-inventory.csv", method="curl")
# load data into r
returns = read.csv("./computer-inventory.csv",sep=";")
#returns=read.csv("returns-laptops-2014wk24.csv",sep=";")
#head(returns)
#str(returns)
#names(returns)
# get return information - separate from all other info
ret_fac = subset(returns$X44.usr_return_at, grepl("2014", returns$X44.usr_return_at))
# convert to date_time data
ret_dt = as.POSIXlt(ret_fac)
# get days of interest
interest = subset(ret_dt, ret_dt > as.POSIXlt("2014-05-28") & ret_dt < as.POSIXlt("2014-05-29"))
# extract the interesting days
just_day = as.character( round(interest, "day"))
# just the hours
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC+0100") + round(as.numeric(interest)/3600)*3600,"%H:%M")
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/3600)*3600,"%H:%M")
just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="CET") + round(as.numeric(interest)/3600)*3600,"%H:%M")
interest_df = data.frame(just_day, just_hour)
# count interesting day frequencies
count_by_hr = table(just_day, just_hour)
by_quarter_hr = as.character(format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/900)*900,"%H:%M"))
count_by_quarter = table(just_day, by_quarter_hr)
# now how to get box charts and line charts out of this table (colorized by day)
pdf(file="./returns_by_hour.pdf")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
dev.off()
pdf(file="./returns_by_quarter_hour.pdf")
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
dev.off()
pdf(file="./returns_by_day.pdf")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")
dev.off()
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count")
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count")
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment