Skip to content

Instantly share code, notes, and snippets.

@ag4ve
Created January 13, 2016 17:08
Show Gist options
  • Save ag4ve/6a3b2b5313a367cc3356 to your computer and use it in GitHub Desktop.
Save ag4ve/6a3b2b5313a367cc3356 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use Text::CSV;
my $csv = Text::CSV->new({binary => 1, always_quote => 1})
or die "Can not use CSV: " . Text::CSV->error_diag();
# A header is expected and hash is expected to be the first field
my $lookup = $ARGV[0] or die "Must specify a lookup csv.\n";
my $fileout = $ARGV[1] // 'out.csv';
# Score is expected as the first field and hash is expected as the second
# A header is expected
# If globbing - do not include the lookup filename
my @resultsin = (scalar(@ARGV) > 2 ? @ARGV[2 .. $#ARGV] : grep {$_ ne $lookup} glob "*.csv");
my ($lookdatain, $header) = get_data($lookup);
# Add headers
unshift(@$header, "Score");
push(@$header, "Individual scores");
# Put lookup data into a hash
my $lookdata;
foreach my $line (@$lookdatain) {
my $hash = shift(@$line);
$lookdata->{$hash} = $line;
}
# Get scores
my $data;
foreach my $resultin (@resultsin) {
my ($resultdata) = get_data($resultin, 1);
if (not grep {defined($_->[0]) and $_->[0] =~ /^[0-9\.-]+$/} @$resultdata) {
($resultdata) = not_csv($resultin);
}
#print "[$resultin] " . Dumper(\[map {{$_->[0], $_->[1]}} @$resultdata]);
# hash => [score0, score1, ... scoreN]
map {
# Push the absolute value of a number
my $num = ($_->[0] =~ s/-//r) // 0;
#print "resultdata: " . $_->[1] . " => " . $_->[0] . "\n";
push @{$data->{$_->[1]}}, $num
if (defined($num) and $num =~ /[0-9\.-]+/)
} @$resultdata;
}
# Generate a hash of hash key => score average
my $order;
foreach my $hash (keys %$data) {
# Number of voters for individual
my $resultnum = $#{$data->{$hash}};
# Get the sum of all scores
my $sum = 0;
map {
# Max score of 10
my $num = ($_ > 10 ? 10 : $_);
$sum += $num;
} @{$data->{$hash}};
# Generate a lookup with average score
$order->{$hash} = ($sum ? $sum/$resultnum : $sum);
}
# Write result file
{
$csv->eol("\r\n");
open (my $fh, '>', $fileout)
or die "Can not write [" . $fileout . "] $!";
# Write header
$csv->print(\*$fh, $header);
# Write output data
map {
$csv->print(\*$fh, [
$order->{$_}, # average score
$_, # hash
@{$lookdata->{$_}}, # identification information
@{$data->{$_}}, # individual voters' scores
])
} sort {$order->{$b} <=> $order->{$a}} keys %$order;
close $fh;
}
sub not_csv {
my ($file) = @_;
open (my $fh, '<', $file)
or die "Can not open [$file] $!";
my $ret;
while (my $line = <$fh>) {
chomp $line;
my @cols = split(',', ($line =~ s/"//gr));
next if (scalar(@cols) < 2 or
$cols[0] !~ /^[0-9\.-]+$/);
$cols[1] =~ s/[^a-zA-Z0-9+\/]//g;
next if (length($cols[1]) != 22);
push @$ret, [@cols[0,1]];
}
return $ret;
}
sub get_data {
my ($file, $noheader) = @_;
open (my $fh, '<', $file)
or die "Can not open [$file] $!";
my $rows = $csv->getline_all($fh);
$csv->eof or $csv->error_diag();
close $fh;
my $header;
$header = shift @$rows
if (not $noheader);
return ($rows, $header);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment