Skip to content

Instantly share code, notes, and snippets.

@thegorgon
Last active July 22, 2021 20:32
Show Gist options
  • Save thegorgon/5430b4149e51e06ecaab109fadfeec56 to your computer and use it in GitHub Desktop.
Save thegorgon/5430b4149e51e06ecaab109fadfeec56 to your computer and use it in GitHub Desktop.
CSV Chunker in Ruby
#! /usr/bin/env ruby
require 'fileutils'
require 'optparse'
require 'csv'
chunks = [[]]
options = { input: nil, output: nil, size: 1000 }
OptionParser.new do |parser|
parser.banner = 'Usage: csv-chunker [options]'
parser.on('-i', '--input INPUT', 'Read CSV from INPUT') do |input|
options[:input] = input
end
parser.on('-o', '--output OUTPUT', 'Output chunks into directory OUTPUT') do |output|
options[:output] = output
end
parser.on('-n', '--size SIZE', Integer, 'Chunk Size') do |size|
options[:size] = size
end
end.parse!
raise 'Input and output are both required' unless options[:input] and options[:output]
extname = File.extname(options[:input])
basename = File.basename(options[:input]).gsub(/#{extname}$/, '')
CSV.read(options[:input], headers: true).each do |row|
if chunks.last.count > options[:size] - 1
chunks << [row]
else
chunks.last << row
end
end
FileUtils.mkdir_p(options[:output])
chunks.each.with_index do |chunk, index|
CSV.open("#{options[:output]}/#{basename}--chunk-#{index}.csv", 'wb', headers: true) do |csv|
csv << chunk.first.headers
chunk.each do |chunk_row|
csv << chunk_row
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment