Created
June 15, 2020 22:23
-
-
Save danlynn/13192ae6cf9328e7b0c7b62e2c1c0ba1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
merged_file_path = '/Users/danlynn/Documents/temp/merge-csvs/ahoy_report-20200607-20200613-merge.csv' | |
file_paths = [ | |
'/Users/danlynn/Documents/temp/merge-csvs/ahoy_report-20200607-20200613-interim.csv', | |
'/Users/danlynn/Documents/temp/merge-csvs/ahoy_report-20200607-20200613-legacy.csv', | |
'/Users/danlynn/Documents/temp/merge-csvs/ahoy_report-20200607-20200613-new.csv' | |
] | |
# Merge column titles from each file so that we can start | |
# writing a new file that includes all titles. | |
merged_col_titles = {} | |
file_paths.each do |file_path| | |
begin | |
file = File.open(file_path) | |
line = file.readline | |
ensure | |
file.close | |
end | |
cols_in_file = 0 | |
unique_col_title_count = 0 | |
fields = CSV.parse(line)[0] | |
fields.each do |col_title| | |
cols_in_file += 1 | |
unless merged_col_titles[col_title] | |
merged_col_titles[col_title] = merged_col_titles.size | |
unique_col_title_count += 1 | |
end | |
end | |
puts "#{File.basename(file_path)}:" | |
puts " cols in file: #{cols_in_file}" | |
puts " new unique col titles: #{unique_col_title_count}" | |
puts " total unique col titles: #{merged_col_titles.size}" | |
end | |
puts "\nmerged col titles:" | |
merged_col_titles.each do |col_title, col_idx| | |
puts " #{"%3.3s" % col_idx}: #{col_title}" | |
end | |
read_count = 0 | |
combined_count = 0 | |
written_count = 0 | |
merged_rows = {} | |
file_paths.each do |file_path| | |
puts "\n=== file: #{File.basename(file_path)}" | |
col_titles = nil | |
CSV.foreach(file_path, 'r') do |row| | |
unless col_titles | |
col_titles ||= row | |
else | |
read_count += 1 | |
merged_row = nil | |
row.each_with_index do |cell, col_idx| | |
if col_idx == 0 | |
unless merged_rows[row[0]] | |
merged_rows[row[0]] ||= Array.new(merged_col_titles.size) | |
merged_rows[row[0]][0] = row[0] | |
else | |
combined_count += 1 | |
end | |
merged_row = merged_rows[row[0]] | |
puts " row: #{row[0]}" | |
else | |
col_mapped_idx = merged_col_titles[col_titles[col_idx]] | |
# if col_titles[col_idx] == 'Ahoy!wich Ice Cream Sandwiches' | |
# puts "test: map #{col_idx} -> #{col_mapped_idx}: #{cell}" | |
# end | |
# puts "--- map: #{col_titles[col_idx]}: #{col_idx} -> #{col_mapped_idx}" | |
merged_row[col_mapped_idx] ||= 0 | |
merged_row[col_mapped_idx] += cell.to_i | |
end | |
end | |
end | |
end | |
end | |
CSV.open(merged_file_path, 'w') do |merged_csv| | |
merged_csv << merged_col_titles.keys | |
sorted_merged_rows = merged_rows.values.sort_by{|row| row[0]} | |
sorted_merged_rows.each do |merged_row| | |
written_count += 1 | |
merged_csv << merged_row | |
end | |
end | |
puts "\ncomplete:" | |
puts " read: #{read_count}" | |
puts " combined: #{combined_count}" | |
puts " written: #{written_count}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment