-
-
Save notbenh/1216168 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use LWP::Simple; | |
#------------------------------------------------------------------------------- | |
# calculate how many times each speaker spoke | |
#------------------------------------------------------------------------------- | |
my %speakers; | |
do{ my ($_) = m/\d{4}-\d\d-\d\d\.(?:([^.]+)\.)?.*/; # named capture? | |
s/^jeff$/lavallee/ if $_; | |
s/^net$/ingy/i if $_; | |
$speakers{lc($_)}++ | |
for split /-/, $_ || 'group'; # handle 'chromatic-schwern' and the 'no speaker' entries | |
} for get(q{http://pdxpm.enobacon.com/}) =~ m/href="pdx-pm.(.*?).mp3"/g; | |
#------------------------------------------------------------------------------- | |
# Group speakers by the number of times they spoke | |
#------------------------------------------------------------------------------- | |
my @report; | |
do { push @{$report[$speakers{$_}]}, $_; | |
} for keys %speakers; | |
#------------------------------------------------------------------------------- | |
# Nice human sounding output | |
#------------------------------------------------------------------------------- | |
for(my $i=1; $i < scalar(@report); $i++) { | |
next unless $report[$i]; | |
my $speakers = join scalar @{$report[$i]} > 2 ? ', ' : ' ' | |
, sort @{$report[$i]}; | |
$speakers =~ s/(.*) (\w+)$/$1 and $2/; | |
printf qq{%s spoke %s.\n} | |
, $speakers | |
, $i == 1 ? 'once' | |
: $i == 2 ? 'twice' | |
: qq{$i times} | |
; | |
} | |
__END__ | |
audrey_tang, austin, cooley, jaap, jay, kevin, panel, pearcey, riddle, roth, shirley, and smoody spoke once. | |
buels, hengst, ingy, and leto spoke twice. | |
deckelmann, granum, and lavallee spoke 3 times. | |
chromatic and wilhelm spoke 5 times. | |
group and schwern spoke 6 times. |
In reference to http://mail.pm.org/pipermail/pdx-pm-list/2011-September/006233.html
I'm not sure about a better way, as it's not worth compromising readability on such a small data set. Your loops and data structures probably won't be the slowest part of your code, network I/O and regexes might be. Time complexity for parsing and printing data can't improve beyond O(n), which is where you're at now (unless you're intentionally trying to break the regex parser with crazy source data).
You could avoid hashes by using foldl (List::Util's reduce sub), but hashes are used elsewhere under the hood (in LWP::Simple and maybe inside the regex matcher). You could remove the perception of looping a bit by moving your first two for loops into a sub that takes a list of names and returns a count of each name.
sub count {
my %by_name; my @counts;
$by_name{$_}++ for @_;
push(@{$counts[$by_name{$_}]}, $_) for sort keys %by_name;
@counts;
}
my %aliases = (jeff => 'lavallee', net => 'ingy');
my $data = get(q{http://pdxpm.enobacon.com/});
my @speakers =
map { $aliases{$_} || $_ } # handle speakers who are known by multiple names
map { split /-/, lc($_||'group') } # handle 'chromatic-schwern' and the 'no speaker' entries
($data =~ m/href="pdx-pm.[\d-]+(?:\.([^.]+))?\.[^.]+\.mp3/g);
my @counts = count @speakers;
for(my $i=1; $i < scalar @counts; $i++) {
my @names = @{$counts[$i]||[]} or next;
printf qq{%s spoke %s.\n}
, join(' and ', join(', ', @names[0..$#names-1]), $names[-1])
, $i == 1 ? 'once'
: $i == 2 ? 'twice'
: qq{$i times}
;
}
A version of count using foldl:
use List::Util qw(reduce);
sub count {
@{
shift reduce { my ($acc, $name, $count) = @$a; $a = $a; # foldl
$name eq $b
? push($acc->[$count+1]||=[], pop($acc->[$count])) && [$acc, $name, $count+1]
: push($acc->[1]||=[], $b) && [$acc, $b, 1]
} [[[]], '', 0], sort @_;
}
}
+1