Last active
January 24, 2016 17:38
-
-
Save nicdoye/7ca8b37c4da380e7e18c to your computer and use it in GitHub Desktop.
Perl (MOOSE) solution to the "find 3 most frequent words in a file"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use feature qw(say switch); | |
###################################################################### | |
package HashOfHash; | |
use Moose; | |
my $_default_value; | |
# A hash, where the values are a hash | |
my %_count_hash = (); | |
sub addTo { | |
my ( $self, $overall_key , $sub_key ) = @_; | |
if ( exists $_count_hash{$overall_key} ) { | |
$_count_hash{$overall_key}{$sub_key} = $_default_value; | |
} else { | |
$_count_hash{$overall_key} = { $sub_key => $_default_value }; | |
} | |
} | |
sub removeFrom { | |
my ( $self, $overall_key , $sub_key ) = @_; | |
if ( exists $_count_hash{$overall_key} ) { | |
if ( exists $_count_hash{$overall_key}{$sub_key}) { | |
delete $_count_hash{$overall_key}{$sub_key} | |
} else { | |
say( STDERR "Key $sub_key not found in value for object's $overall_key" ); | |
} | |
} else { | |
say( STDERR "Key $overall_key not found in object" ) unless ( $overall_key == 0 ); | |
} | |
} | |
sub overall_keys { | |
my $self = shift; | |
keys( %_count_hash ); | |
} | |
sub keys_for_overall_key { | |
my ( $self, $key ) = @_; | |
(exists $_count_hash{$key} ) ? keys( $_count_hash{$key} ) : (); | |
} | |
no Moose; | |
###################################################################### | |
package CountHash; | |
use Moose; | |
extends 'HashOfHash'; | |
sub increment { | |
my ( $self, $count, $str ) = @_; | |
$self->removeFrom( $count, $str ); | |
$self->addTo( ++$count, $str ); | |
} | |
sub top_n { | |
my ( $self, $n ) = @_; | |
# would be better to pass comparator to HashOfHash | |
my @sorted_overall_keys = sort {$b <=> $a} $self->overall_keys(); | |
# Allows for over $n where there is a tie | |
my @answer; | |
foreach my $key ( @sorted_overall_keys ) { | |
push @answer, $self->keys_for_overall_key($key); | |
last if $#answer >= ($n - 1); | |
} | |
return \@answer; | |
} | |
no Moose; | |
###################################################################### | |
package TwoWayHash; | |
use Moose; | |
# this is a hash of ( word => number-of-occurrences ) | |
my %_word_hash = (); | |
# this is a hash of ( number-of-occurences => hash of ( words, nil ) ) | |
# Coud refactor this out | |
my $_count_hash = CountHash->new(); | |
sub addTo { | |
my ( $self, $str ) = @_; | |
$_word_hash{$str} = ( exists $_word_hash{$str} ) ? $_word_hash{$str} + 1 : 1; | |
# D'oh! | |
$_count_hash->increment( $_word_hash{$str} - 1, $str); | |
} | |
sub top_n { | |
my ( $self, $n ) = @_; | |
$_count_hash->top_n($n); | |
} | |
no Moose; | |
###################################################################### | |
package FileCounter; | |
use Moose; | |
use FileHandle; | |
has 'filename' => ( | |
is => 'rw', | |
isa => 'Str', | |
default => '' | |
); | |
has 'two_way_hash' => ( | |
is => 'rw', | |
isa => 'TwoWayHash', | |
default => sub { | |
my $self = shift; | |
TwoWayHash->new(); | |
} | |
); | |
sub counter { | |
my ( $self ) = @_; | |
my $fh = FileHandle->new($self->filename, 'r'); | |
unless ( defined $fh ) { | |
say( STDERR "Couldn't open file: $self->filename" ); | |
return; | |
} | |
while ( <$fh> ) { | |
my @words = split /\s+/, $_; | |
foreach my $word ( @words ) { | |
$self->two_way_hash->addTo( $word ) if $word; | |
} | |
} | |
undef $fh; | |
} | |
sub top_n { | |
my ( $self, $n ) = @_; | |
$self->two_way_hash->top_n($n); | |
} | |
no Moose; | |
###################################################################### | |
package main; | |
die ("I need a filename, idiot\n") unless $#ARGV ge 0; | |
foreach my $file (@ARGV) { | |
my $filecounter = FileCounter->new( filename => $file ); | |
$filecounter->counter(); | |
my $n_ref = $filecounter->top_n(3); | |
foreach my $n ( @{$n_ref} ) { | |
say $n; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment