Created
April 22, 2011 00:29
-
-
Save chengyus/935776 to your computer and use it in GitHub Desktop.
Purpose: A Gnotime Activity Journal HTML parse to .csv shell script; I use it for copying/pasting to a timesheet template to save some typing. Editor used: vim/gvim
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'rubygems' | |
| require 'nokogiri' | |
| require 'fastercsv' | |
| require 'htmlentities' | |
| #puts $ARGV[0]; | |
| doc = Nokogiri::HTML( File.read($ARGV[0]) ) | |
| memo = [] #memo array | |
| elapsed = [] #elapsed array | |
| dates = [] #dates array | |
| index = 0 | |
| this_date = "" | |
| start_date = $ARGV[1] | |
| # convert "01:30:00" to 1.5, "1:45:00" to 1.75, etc... | |
| # Credits/Ref: http://www.intura.co.uk/?p=23 | |
| def elapsed_to_decimal(elapsed) | |
| return 0 if elapsed.empty? | |
| left = elapsed[0,elapsed.index(":")].to_i #hh | |
| middle = elapsed[elapsed.index(":")+1,2].to_i #mm | |
| right = elapsed[elapsed.rindex(":")+1,2].to_i #seconds field not really used (ignored)... | |
| return left + (((middle * 5) / 3) / 100.to_f) | |
| end | |
| same_day_elapsed_time = false | |
| doc.css('td').each do |node| | |
| if node['class'] =~ /gnotime-start-stop-table-start$/ | |
| this_date = node.content() | |
| if this_date =~ /2011-/ | |
| dates.push( this_date ) | |
| #puts "pushed " + this_date | |
| same_day_elapsed_time = false | |
| elsif this_date == "\302\240" #irb didn't show but "\302\240" | |
| #puts "detects nbsp" | |
| same_day_elapsed_time = true | |
| end | |
| end | |
| if node['class'] =~ /(.*)-elapsed$/ | |
| this_date = dates.last() | |
| if( same_day_elapsed_time == false ) | |
| elapsed.push( elapsed_to_decimal( node.content() ) ) | |
| else | |
| #puts "same_day_elapsed_time is #{same_day_elapsed_time}" | |
| elapsed.push( elapsed.pop() + elapsed_to_decimal( node.content() ) ) | |
| end | |
| end | |
| #index += 1 | |
| end | |
| #puts "dates are " + dates.join(',') | |
| doc.css('div').each do |node| #|div| | |
| #if div.attributes["class"].value =~ /(.*)-memo$/ | |
| if node['class'] =~ /(.*)-memo$/ | |
| this_memo_node_text = node.children.children.children.to_s() | |
| coder = HTMLEntities.new | |
| memo.push( coder.decode(this_memo_node_text) ) ; index += 1 | |
| #check child dates for diff_day_same_memo condition. and then dup memo | |
| related_tds = node.next_element().next_element().css('td') | |
| td_index = 0 | |
| #ad-hoc range limit due to my curernt Gnotime cross-day same Entry sit. | |
| while ( td_index < 16 && (a_memo_date_xml_text = related_tds[td_index]) ) | |
| if a_memo_date_xml_text.content =~ /2011-/ | |
| if td_index >= 5 | |
| #puts "a different date: #{a_memo_date_xml_text.content}" | |
| #push same memo | |
| memo.push( coder.decode(this_memo_node_text) ); index += 1 | |
| end | |
| end | |
| td_index += 5 | |
| end | |
| end | |
| end | |
| #keys = (memo.keys + elapsed.keys).uniq | |
| #add Array.sum ability to Array class for summing up hours | |
| #class Array; def sum; inject( nil ) { |sum,x| sum ? sum+x : x }; end; end | |
| FasterCSV.open("timesheet.csv", "w") do |csv| | |
| #csv << ["Date", "Hrs", "Description"] | |
| for memo_entry_index in 0...index | |
| reverse_index = index - memo_entry_index | |
| if( dates[reverse_index] && dates[reverse_index] >= start_date ) | |
| csv << [ dates[reverse_index], elapsed[reverse_index], memo[reverse_index] ] | |
| csv << [ nil,nil,nil ] # a blank row for better spacing | |
| end | |
| end | |
| #keys.each do |key| | |
| # csv << [ key, elapsed[key], memo[key] ] | |
| #end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment