Skip to content

Instantly share code, notes, and snippets.

@pts
Created December 2, 2013 16:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pts/7752110 to your computer and use it in GitHub Desktop.
Save pts/7752110 to your computer and use it in GitHub Desktop.
#!perl -w
# let's learn pod!
=head1 NAME
ladder - Finds a word letter between two words
=head1 SYNOPSIS
B<ladder> I<word1> I<word2> [I<dictfile>]
=head1 DESCRIPTION
This is a solution for Expert Perl Quiz of the Week 22
(L<http://perl.plover.com/qotw/e/022>).
It builds a word ladder between two words given as arguments.
The program needs a I<sorted> plain text dictionary file to build the word ladder.
The name of this dictionary can be given as an optional third argument.
There is a default dictionary, of which the path can be set in the source.
=head1 SWITCHES
=over
=item B<-l>
Do not use capitalized words from the dictionary.
If this switch is not used, case differences are ignored.
=item B<-u>
Do not do the first-letter optimization.
The optimization should make the program run faster in most cases,
but when it makes it slower, you can disable it with this switch.
The optimization relies on that the dictionary is sorted (case-insensitively),
so when you use an unsorted dictionary, you should use this switch.
=item B<-w>
Spit out various debugging information, mostly useful for developping.
=back
=head1 IMPLEMENTATION
This program uses a very simple algorithm: a two-way breadth-first search.
Thus, we first read the dictionary.
Then we take the graph of words with the edges being those pair of words that
differ in one letter only.
Then we do a breadth-first search from both endpoints simultanously.
If we find a word that is reachable from both endpoints, we return success.
If the search from either endpoint ends, we return a failure.
I first wanted to implement a more complicated (and hopefully faster)
search algorithm, but when I implemented this one,
it turned out that the search itself is much faster than reading the dictionary,
so I rather optimized I/O.
Thus, the program now tries to read only those parts of the dictionary that start
with the same letter as one of the given words, and try to find a word ladder
among those.
Furthermore, there is yet one more level of optimization concerning the first
two letters of the words.
=head1 EXAMPLE
You run:
perl ladder find lose
You get:
find
fine
fise
Lise
lose
=head1 AUTHOR
L<ambrus@...>
=cut
# settings
our $default_dictname = "/home/ambrus/a/qotw/e22/words-b";
# 0. Declarations
use warnings; use strict;
use IO::Handle;
# 1. Parse argv
our($dictname, $noproper, $word1o, $word2o, $debug, $unoptimize, $unoptimize2);
{
use Getopt::Std;
getopts "luUw", \my %switch;
($noproper, $debug, $unoptimize, $unoptimize2) = @switch{qw(l w u U)};
2<=@ARGV && @ARGV<=3 or die "Usage: ladder -d dictfile word1 word2";
($word1o, $word2o, $dictname) = @ARGV;
defined($dictname) or $dictname = $default_dictname;
}
# 2. Locale-dependent functions
{
use locale;
my $re = qr/\A[[:alpha:]]+\z/;
$noproper and $re = qr/\A[[:lower:]]+\z/;
sub checkword {
my($w) = @_;
$w=~$re ? 1 : 0;
}
# You might want to treat some letters the same (especially in French).
# In that case, change the following function accordingly. The function must
# not change the lenght of the word or else the program will break.
sub normalize {
lc $_[0];
}
}
# 3. Do some simple checking on the words
our($length, $word1, $word2);
sub addword;
do {
checkword($word1o) && checkword($word2o) or
die "one of the given words contain illegal characters";
length($word1o)==length($word2o) or
die "it is impossible to build a word ladder, as the two words are of different length\n";
$length = length($word1o);
$word1 = normalize($word1o);
addword $word1, $word1o;
$word2 = normalize($word2o);
addword $word2, $word2o;
$word1 eq $word2 and do {
print $word1, "\n";
exit 0;
};
};
# 4. Read and process the dictionary
# this should be really simple, but the code somehow became a mess.
our %word; # this hash contains the original forms of a word
our $dict; # a filehandle
sub addword;
do {
open $dict, "<", $dictname or die qq{cannot open dictionary file "$dictname": $!};
$debug and warn qq{dictionary file "$dictname" open};
};
sub addword { # $w should be eq normalize($o)
my($w, $o) = @_;
defined($word{$w}) or
$word{$w} = $o;
}
sub readdict {
no locale;
my($until) = @_;
my($w, $l);
$! = 0;
while (defined($l = <$dict>)) {
chomp $l;
length($l)==$length or next;
checkword $l or next;
$w = normalize($l);
$w lt $until or last;
addword $w, $l;
}
error $dict and die qq{error readig dictionary file "$dictname": $!}; # see "http://www.perlmonks.com/?node_id=386996"
$debug and warn "there are ", (0+keys(%word)), " words in the hash now";
}
# 5. Optimization: not to read the whole word list unless neccessary
our $phase;
sub seekpart; sub makepatts; sub dosearch; sub seekpart;
sub readpart {
my($prefix) = @_;
defined($prefix) or $prefix = "";
seekpart $prefix;
readdict $prefix . "\xff\xff\xff"; # yes, this is a kludge, but I hope no word will have a triple \xff in it.
}
sub seekpart {
my($prefix) = @_;
my($l, $w);
my($a, $b) = (0, -s $dict);
defined($b) or die qq{error fstating the dictionary file "$dictname": $!};
until ($b-$a<=2048) {
my $c = int(($a + $b)/2);
seek $dict, $c, 0 or
die qq{error seeking the dictionary file "$dictname" to $c during bsearch: $!};
$! = 0;
$l = <$dict>; # get aligned to a newline
chomp($l = <$dict>) until
checkword $l or !defined($l);
error $dict and
die qq{error reading the dictionary file "$dictname" during bsearch: $!};
$w = defined($l) ? normalize($l) : ("\xff" x 6);
($w gt $prefix) ? $b : $a = $c;
}
$debug and warn qq{bsearch done ("$prefix" $a)};
seek $dict, $a, 0 or
die qq{error seeking the dictionary file "dictname" to $a, during bsearch, final: $!};
$a>0 and do { # IMPORTANT!
<$dict>;
error $dict and
die qq{error reading the dictionary file "$dictname" during bsearch, final align: $!}
};
}
our %readpart;
sub condreadpart {
my($l) = @_;
unless ($readpart{$l}) {
$debug and warn qq{processing prefix "$l" from the dictionary};
readpart $l;
$readpart{$l} = 1;
}
}
ALL: {
if (!$unoptimize) {
if (!$unoptimize2) {
$phase = 2;
condreadpart substr($word1, 0, 2);
condreadpart substr($word2, 0, 2);
condreadpart substr($word1, 0, 1) . substr($word2, 1, 1);
condreadpart substr($word2, 0, 1) . substr($word1, 1, 1);
makepatts;
dosearch;
}
$phase = 1;
condreadpart substr($word1, 0, 1);
condreadpart substr($word2, 0, 1);
makepatts;
dosearch;
}
$phase = 0;
$debug and warn "processing the whole dictionary";
readpart "";
makepatts;
close $dict or die qq{error closing dictionary file "$dictname": $!};
dosearch;
}
# 6. Create helper table so that we can find adjacent words faster
our %patt;
sub makepatts {
%patt = ();
for my $w (keys %word) {
for my $n (0..length($w)-1) {
my $p = substr($w, 0, $n) . "." . substr($w, $n+1);
push @{$patt{$p}}, $w;
}
}
$debug and warn "there are ", (0+keys(%word)), " patterns in the hash now";
}
# 7. Do the actual search
our @qu; # the queue needed for the bsearch
our %seen; # what nodes were seen in the bsearch and from where
sub visit; sub visit1; sub success; sub neighbours; sub failure;
sub cleansearch {
%seen = ();
}
sub dosearch {
PHASE: {
cleansearch;
@qu = (
[$word1, 1, undef],
1,
[$word2, 2, undef],
2,
);
while (my $e = shift @qu) {
if (ref $e) {
visit @$e;
} else {
push @qu, $e;
!ref($qu[0]) and failure ($e);
$debug and warn "$e\n";
}
}
die "internal error: the markers have somehow disappeared";
}
}
sub visit {
my($w, $dir, $from) = @_;
my $s = $seen{$w}[0];
$s && $s!=$dir and
success $w, $dir, $from;
$s and return;
$seen{$w} = [$dir, $from];
neighbours $w, sub
{ push @qu, [$_[0], $dir, $w]; };
}
# 8. Find the neighbours of a given word
sub neighbours {
my($w, $cb) = @_;
for my $n (0..length($w)-1) {
my $p = substr($w, 0, $n) . "." . substr($w, $n+1);
@{$patt{$p}}<=1 and next;
for my $x (@{$patt{$p}}) {
&$cb($x) unless $x eq $w;
}
}
}
# 9. Output the result, either word ladder or failure
sub failure {
$debug and warn "phase $phase failed, word$_[0] was more isolated,";
$phase or do {
die "no word ladder found\n";
};
no warnings "exiting";
next PHASE; # :-)
}
sub printladder1; sub printladder2;
sub success {
$debug and warn "about to print solution";
my($pn, $dir, $po) = @_;
my($p1, $p2) = $dir==1 ? ($po, $pn) : ($pn, $po);
printladder1 $p1;
printladder2 $p2;
no warnings "exiting";
last ALL;
}
sub printladder1 {
my $r = $word{$_[0]};
my $n = $seen{$_[0]}[1];
$n and printladder1 $n;
print $r, "\n";
}
sub printladder2 {
my $r = $word{$_[0]};
my $n = $seen{$_[0]}[1];
print $r, "\n";
$n and printladder2 $n;
}
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment