Last active
August 29, 2015 14:07
-
-
Save bradland/3679a022d6699e3541f8 to your computer and use it in GitHub Desktop.
Boilerplate CSV analysis script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'csv' | |
require 'ostruct' | |
require 'optparse' | |
require 'i18n' | |
require 'set' # this should only be required until i18n updates to new rev | |
require 'active_support/core_ext/string/inflections' | |
I18n.enforce_available_locales = true # I18n spams warn unless this is set | |
class CSVAnalyze | |
::Version = [0,0,1] | |
attr_accessor :options | |
# $stdout.sync = true # prevents buffering of output; uncomment this only if you need it. | |
Signal.trap(:INT, :EXIT) # Trap ctrl+c from users or `kill -SIGINT pid` from shell and exits from pipe chains | |
def initialize(args, options = {}) | |
# Handle options | |
@options = OpenStruct.new | |
@options.format = :tsv | |
opt_parser = OptionParser.new do |opt| | |
opt.banner = "Usage: #{$0} [OPTION]... [FILE]..." | |
opt.on("-c","--csv","Output CSV format instead of standard TSV") do | |
@options.format = :csv | |
end | |
opt.on_tail("-h","--help","Print usage information.") do | |
$stderr.puts opt_parser | |
exit 1 | |
end | |
opt.on_tail("--version", "Show version") do | |
puts ::Version.join('.') | |
exit 1 | |
end | |
end | |
begin | |
opt_parser.parse! | |
rescue OptionParser::InvalidOption => e | |
$stderr.puts "Specified #{e}" | |
$stderr.puts opt_parser | |
exit 64 # EX_USAGE | |
end | |
end | |
def run! | |
@data = ARGF.readlines # slurps entire file (refactor to block form if working with large files!) | |
# BEGIN data pre-processing | |
# Extract headers from first line | |
headers = CSV.parse(data.shift).first.map { |col| col.parameterize.underscore.to_sym } | |
# Parse CSV and convert to hash rows | |
data.map! { |line| line.parse_csv } | |
data.map! { |row| Hash[headers.zip(row)] } | |
# END data pre-processing | |
# BEGIN data analysis | |
summaries = data.each.with_index.collect do |row, i| | |
# This is where you do the work. | |
row[:some_column] = "Some computed value goes here." | |
row | |
end | |
# END data analysis | |
# BEGIN report to stdout | |
case @options.format | |
when :tsv | |
puts summaries.first.keys.join("\t") | |
summaries.each do |row| | |
puts row.values.join("\t") | |
end | |
when :csv | |
puts summaries.first.keys.to_csv | |
summaries.each do |row| | |
puts row.values.to_csv | |
end | |
end | |
# END report to stdout | |
end | |
end | |
begin | |
if $0 == __FILE__ | |
CSVAnalyze.new(ARGV).run! | |
end | |
rescue Interrupt | |
# Ctrl^C | |
exit 130 | |
rescue Errno::EPIPE | |
# STDOUT was closed | |
exit 74 # EX_IOERR | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment