Skip to content

Instantly share code, notes, and snippets.

@chengyus
Created April 22, 2011 00:29
Show Gist options
  • Save chengyus/935776 to your computer and use it in GitHub Desktop.
Save chengyus/935776 to your computer and use it in GitHub Desktop.
Purpose: A Gnotime Activity Journal HTML parse to .csv shell script; I use it for copying/pasting to a timesheet template to save some typing. Editor used: vim/gvim
require 'rubygems'
require 'nokogiri'
require 'fastercsv'
require 'htmlentities'
#puts $ARGV[0];
doc = Nokogiri::HTML( File.read($ARGV[0]) )
memo = [] #memo array
elapsed = [] #elapsed array
dates = [] #dates array
index = 0
this_date = ""
start_date = $ARGV[1]
# convert "01:30:00" to 1.5, "1:45:00" to 1.75, etc...
# Credits/Ref: http://www.intura.co.uk/?p=23
def elapsed_to_decimal(elapsed)
return 0 if elapsed.empty?
left = elapsed[0,elapsed.index(":")].to_i #hh
middle = elapsed[elapsed.index(":")+1,2].to_i #mm
right = elapsed[elapsed.rindex(":")+1,2].to_i #seconds field not really used (ignored)...
return left + (((middle * 5) / 3) / 100.to_f)
end
same_day_elapsed_time = false
doc.css('td').each do |node|
if node['class'] =~ /gnotime-start-stop-table-start$/
this_date = node.content()
if this_date =~ /2011-/
dates.push( this_date )
#puts "pushed " + this_date
same_day_elapsed_time = false
elsif this_date == "\302\240" #irb didn't show   but "\302\240"
#puts "detects nbsp"
same_day_elapsed_time = true
end
end
if node['class'] =~ /(.*)-elapsed$/
this_date = dates.last()
if( same_day_elapsed_time == false )
elapsed.push( elapsed_to_decimal( node.content() ) )
else
#puts "same_day_elapsed_time is #{same_day_elapsed_time}"
elapsed.push( elapsed.pop() + elapsed_to_decimal( node.content() ) )
end
end
#index += 1
end
#puts "dates are " + dates.join(',')
doc.css('div').each do |node| #|div|
#if div.attributes["class"].value =~ /(.*)-memo$/
if node['class'] =~ /(.*)-memo$/
this_memo_node_text = node.children.children.children.to_s()
coder = HTMLEntities.new
memo.push( coder.decode(this_memo_node_text) ) ; index += 1
#check child dates for diff_day_same_memo condition. and then dup memo
related_tds = node.next_element().next_element().css('td')
td_index = 0
#ad-hoc range limit due to my curernt Gnotime cross-day same Entry sit.
while ( td_index < 16 && (a_memo_date_xml_text = related_tds[td_index]) )
if a_memo_date_xml_text.content =~ /2011-/
if td_index >= 5
#puts "a different date: #{a_memo_date_xml_text.content}"
#push same memo
memo.push( coder.decode(this_memo_node_text) ); index += 1
end
end
td_index += 5
end
end
end
#keys = (memo.keys + elapsed.keys).uniq
#add Array.sum ability to Array class for summing up hours
#class Array; def sum; inject( nil ) { |sum,x| sum ? sum+x : x }; end; end
FasterCSV.open("timesheet.csv", "w") do |csv|
#csv << ["Date", "Hrs", "Description"]
for memo_entry_index in 0...index
reverse_index = index - memo_entry_index
if( dates[reverse_index] && dates[reverse_index] >= start_date )
csv << [ dates[reverse_index], elapsed[reverse_index], memo[reverse_index] ]
csv << [ nil,nil,nil ] # a blank row for better spacing
end
end
#keys.each do |key|
# csv << [ key, elapsed[key], memo[key] ]
#end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment