Skip to content

Instantly share code, notes, and snippets.

@aniruddha-a
Created April 28, 2014 05:48
Show Gist options
  • Save aniruddha-a/11362673 to your computer and use it in GitHub Desktop.
Save aniruddha-a/11362673 to your computer and use it in GitHub Desktop.
Match CSV files based on a field (name) and merge them
#!/usr/bin/perl
use Parse::CSV;
use strict;
my %wb2_table = ();
my $workbook2_data = Parse::CSV->new ( file => 'merged_workbook2.csv' );
# Note: the header lines are removed in the CSV files for simplicity
# Parse one workbook and load it to a hash
while (my $ref = $workbook2_data->fetch) {
my @line = @$ref;
my $name = $line[5]; # 5th field is the name here
$name =~ s/[.,]/ /g; # cleanup
next if $name =~ /^$/;
my @nsplits = split(/ /, lc($name)); # lowecase
my $cn = join(' ', sort @nsplits); # canonicalize (sort n join)
$cn =~ s/^\s+|\s+$//g; # trim
$wb2_table{$cn} = $ref;
}
# Now load the second workbook and match in the hash
my $workbook1_data = Parse::CSV->new( file => ($ARGV[0] or 'merged_workbook1.csv') );
while (my $ref = $workbook1_data->fetch ) {
my @line = @$ref;
my $name = $line[1]; # field 1 is the name here
$name =~ s/[.,]/ /g; # cleanup
my @nsplits = split(/ /, lc($name)); # lowercase it and split
my $cn = join (' ', sort @nsplits); # canonicalize (sort n join)
$cn =~ s/^\s+|\s+$//g; # trim
if (exists($wb2_table{$cn})) { # try matching in the hash
my $ar = $wb2_table{$cn};
my @line_wb2 = @$ar;
$" = '","'; # Ensure quoting is correct
print "\"@line\",\""; # the current CSV row
print "\"@line_wb2\",\""; # CSV row of the earlier workbook
print "\"\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment