Skip to content

Instantly share code, notes, and snippets.

@nicdoye
Last active January 24, 2016 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicdoye/7ca8b37c4da380e7e18c to your computer and use it in GitHub Desktop.
Save nicdoye/7ca8b37c4da380e7e18c to your computer and use it in GitHub Desktop.
Perl (MOOSE) solution to the "find 3 most frequent words in a file"
#!/usr/bin/env perl
use strict;
use warnings;
use feature qw(say switch);
######################################################################
package HashOfHash;
use Moose;
my $_default_value;
# A hash, where the values are a hash
my %_count_hash = ();
sub addTo {
my ( $self, $overall_key , $sub_key ) = @_;
if ( exists $_count_hash{$overall_key} ) {
$_count_hash{$overall_key}{$sub_key} = $_default_value;
} else {
$_count_hash{$overall_key} = { $sub_key => $_default_value };
}
}
sub removeFrom {
my ( $self, $overall_key , $sub_key ) = @_;
if ( exists $_count_hash{$overall_key} ) {
if ( exists $_count_hash{$overall_key}{$sub_key}) {
delete $_count_hash{$overall_key}{$sub_key}
} else {
say( STDERR "Key $sub_key not found in value for object's $overall_key" );
}
} else {
say( STDERR "Key $overall_key not found in object" ) unless ( $overall_key == 0 );
}
}
sub overall_keys {
my $self = shift;
keys( %_count_hash );
}
sub keys_for_overall_key {
my ( $self, $key ) = @_;
(exists $_count_hash{$key} ) ? keys( $_count_hash{$key} ) : ();
}
no Moose;
######################################################################
package CountHash;
use Moose;
extends 'HashOfHash';
sub increment {
my ( $self, $count, $str ) = @_;
$self->removeFrom( $count, $str );
$self->addTo( ++$count, $str );
}
sub top_n {
my ( $self, $n ) = @_;
# would be better to pass comparator to HashOfHash
my @sorted_overall_keys = sort {$b <=> $a} $self->overall_keys();
# Allows for over $n where there is a tie
my @answer;
foreach my $key ( @sorted_overall_keys ) {
push @answer, $self->keys_for_overall_key($key);
last if $#answer >= ($n - 1);
}
return \@answer;
}
no Moose;
######################################################################
package TwoWayHash;
use Moose;
# this is a hash of ( word => number-of-occurrences )
my %_word_hash = ();
# this is a hash of ( number-of-occurences => hash of ( words, nil ) )
# Coud refactor this out
my $_count_hash = CountHash->new();
sub addTo {
my ( $self, $str ) = @_;
$_word_hash{$str} = ( exists $_word_hash{$str} ) ? $_word_hash{$str} + 1 : 1;
# D'oh!
$_count_hash->increment( $_word_hash{$str} - 1, $str);
}
sub top_n {
my ( $self, $n ) = @_;
$_count_hash->top_n($n);
}
no Moose;
######################################################################
package FileCounter;
use Moose;
use FileHandle;
has 'filename' => (
is => 'rw',
isa => 'Str',
default => ''
);
has 'two_way_hash' => (
is => 'rw',
isa => 'TwoWayHash',
default => sub {
my $self = shift;
TwoWayHash->new();
}
);
sub counter {
my ( $self ) = @_;
my $fh = FileHandle->new($self->filename, 'r');
unless ( defined $fh ) {
say( STDERR "Couldn't open file: $self->filename" );
return;
}
while ( <$fh> ) {
my @words = split /\s+/, $_;
foreach my $word ( @words ) {
$self->two_way_hash->addTo( $word ) if $word;
}
}
undef $fh;
}
sub top_n {
my ( $self, $n ) = @_;
$self->two_way_hash->top_n($n);
}
no Moose;
######################################################################
package main;
die ("I need a filename, idiot\n") unless $#ARGV ge 0;
foreach my $file (@ARGV) {
my $filecounter = FileCounter->new( filename => $file );
$filecounter->counter();
my $n_ref = $filecounter->top_n(3);
foreach my $n ( @{$n_ref} ) {
say $n;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment