Skip to content

Instantly share code, notes, and snippets.

@zed9h
Created July 17, 2009 04:55
Show Gist options
  • Save zed9h/148894 to your computer and use it in GitHub Desktop.
Save zed9h/148894 to your computer and use it in GitHub Desktop.
Lewis Carroll's doublets automated solver
#!/usr/bin/perl
use strict;
use warnings;
my $VERSION=2;
# TODO append results to file (doublet-w1-w2.txt:$0 @ARGV\n...)
printf "Lewis Carroll's Doublet Automated v%d\n", $VERSION;
print "$0 @ARGV\n";
use Getopt::Std;
my %weight = (
other=>1,
verb =>5,
plural=>2,
# gender=>3, # TODO avoid gender switch, mark someway on dictionary load (_g => other_gender)
# stats=>1, # TODO build statistics from sample.txt (machado de assis texts)
);
getopt('VOPGS', \%weight);
my ($start, $end) = @ARGV;
die "word length does not match" unless length $start == length $end;
# DICTIONARY
my %word = ();
my $accent = do "accent.data";
# { 'á'=>'a','ä'=>'a','à'=>'a','â'=>'a','ã'=>'a', 'å'=>'a',
sub norm($)
{
local $_ = shift;
s{[\x7f-\xff]}{exists $accent->{$&} ? $accent->{$&} : $&}ge;
lc $_
}
if($weight{other}) {
open WORD, "wordlist.txt" or die "$!";
# abdicaç:ão,ões
# abdome:,s
# Uruaçu
# únic:a,as,o,os
while(<WORD>) {
chomp;
next if /^[A-Z]/;
@_ = split /:/;
if(@_ == 2) {
foreach (split /,/, $_[1]) {
$_ = $_[0].$_;
my $__ = norm $_;
if(/s$/) {
next if not $weight{plural};
$word{$__}->{_p}++;
}
$word{$__}->{$_}++;
}
} else {
$word{norm($_)}->{$_}++;
}
}
close WORD;
}
if($weight{verb}) {
open VERB, "verblist.txt" or die "$!";
# confluía:confluías:confluía:confluíamos:confluíeis:confluíam
# confluir:confluindo:confluído
# ser:sendo:sido
# aceito
while(<VERB>) {
chomp;
foreach (split /:/) {
$_ = norm($_);
$word{$_}->{_v}++;
$word{$_}->{$_}++;
}
}
close VERB;
}
printf "Dictionary loaded with %d words.\n", scalar keys %word;
foreach ($start, $end) {
die "$_ not on dictionary" unless exists $word{norm($_)};
}
# GRAPH
use Graph;
my $g = Graph::Undirected->new;
my $n = 0;
foreach my $w1 (sort keys %word) {
next unless length $start == length $w1;
for(my $i=0; $i < length $w1; $i++) {
my @w2 = (substr($w1, 0, $i), substr($w1,$i,1), substr($w1, $i+1));
foreach my $c ('a'..'z') {
next if $c eq $w2[1];
my $w2 = $w2[0].$c.$w2[2];
next unless exists $word{$w2};
my $h =
exists $word{$w1}->{_v} || exists $word{$w2}->{_v} ? $weight{verb} :
exists $word{$w1}->{_p} || exists $word{$w2}->{_p} ? $weight{plural} :
# !$w2[2] && (
# ($c eq 'a' && $w2[1] eq 'o') ||
# ($c eq 'o' && $w2[1] eq 'a') ) ? $weight{gender} :
$weight{other};
$g->add_weighted_edge($w1,$w2,$h);
}
}
printf "\r%d words linked", ++$n;
}
print ".\n";
my @path = $g->SP_Dijkstra(norm($start), norm($end));
print "Normal solution: ", join(",", @path), "\n";
print "Dictionary Solutions:\n";
foreach my $w (@path) {
print join(" | ", sort grep {!/^_/} keys %{$word{$w}}), "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment