zed9h/doublets.pl

## doublets.pl
#!/usr/bin/perl
use strict;
use warnings;
my $VERSION=2;

# TODO append results to file (doublet-w1-w2.txt:$0 @ARGV\n...)

printf "Lewis Carroll's Doublet Automated v%d\n", $VERSION;
print "$0 @ARGV\n";

use Getopt::Std;
my %weight = (
	other=>1,
	verb =>5,
	plural=>2,
#	gender=>3, # TODO avoid gender switch, mark someway on dictionary load (_g => other_gender)
#	stats=>1,  # TODO build statistics from sample.txt (machado de assis texts)
);
getopt('VOPGS', \%weight);
my ($start, $end) = @ARGV;

die "word length does not match" unless length $start == length $end;

# DICTIONARY

my %word = ();

my $accent = do "accent.data";
# { 'á'=>'a','ä'=>'a','à'=>'a','â'=>'a','ã'=>'a', 'å'=>'a',
sub norm($)
{
	local $_ = shift;
	s{[\x7f-\xff]}{exists $accent->{$&} ? $accent->{$&} : $&}ge;
	lc $_
}

if($weight{other}) {
	open WORD, "wordlist.txt" or die "$!";
	# abdicaç:ão,ões
	# abdome:,s
	# Uruaçu
	# únic:a,as,o,os
	while(<WORD>) {
		chomp;
		next if /^[A-Z]/;
		@_ = split /:/;
		if(@_ == 2) {
			foreach (split /,/, $_[1]) {
				$_ = $_[0].$_;
				my $__ = norm $_;
				if(/s$/) {
					next if not $weight{plural};
					$word{$__}->{_p}++;
				}
				$word{$__}->{$_}++;
			}
		} else {
			$word{norm($_)}->{$_}++;
		}
	}
	close WORD;
}

if($weight{verb}) {
	open VERB, "verblist.txt" or die "$!";
	# confluía:confluías:confluía:confluíamos:confluíeis:confluíam
	# confluir:confluindo:confluído
	# ser:sendo:sido
	# aceito
	while(<VERB>) {
		chomp;
		foreach (split /:/) {
			$_ = norm($_);
			$word{$_}->{_v}++;
			$word{$_}->{$_}++;
		}
	}
	close VERB;
}

printf "Dictionary loaded with %d words.\n", scalar keys %word;

foreach ($start, $end) {
	die "$_ not on dictionary" unless exists $word{norm($_)};
}

# GRAPH

use Graph;
my $g = Graph::Undirected->new;

my $n = 0;
foreach my $w1 (sort keys %word) {
	next unless length $start == length $w1;
	for(my $i=0; $i < length $w1; $i++) {
		my @w2 = (substr($w1, 0, $i), substr($w1,$i,1), substr($w1, $i+1));
		foreach my $c ('a'..'z') {
			next if $c eq $w2[1];
			my $w2 = $w2[0].$c.$w2[2];
			next unless exists $word{$w2};
			my $h =
				exists $word{$w1}->{_v} || exists $word{$w2}->{_v} ? $weight{verb} :
				exists $word{$w1}->{_p} || exists $word{$w2}->{_p} ? $weight{plural} :
#				!$w2[2] && (
#					($c eq 'a' && $w2[1] eq 'o') ||
#					($c eq 'o' && $w2[1] eq 'a') ) ? $weight{gender} :
				$weight{other};
			$g->add_weighted_edge($w1,$w2,$h);
		}
	}
	printf "\r%d words linked", ++$n;
}
print ".\n";

my @path = $g->SP_Dijkstra(norm($start), norm($end));
print "Normal solution: ", join(",", @path), "\n";
print "Dictionary Solutions:\n";

foreach my $w (@path) {
	print join(" | ", sort grep {!/^_/} keys %{$word{$w}}), "\n";
}
	#!/usr/bin/perl
	use strict;
	use warnings;
	my $VERSION=2;

	# TODO append results to file (doublet-w1-w2.txt:$0 @ARGV\n...)

	printf "Lewis Carroll's Doublet Automated v%d\n", $VERSION;
	print "$0 @ARGV\n";

	use Getopt::Std;
	my %weight = (
	other=>1,
	verb =>5,
	plural=>2,
	# gender=>3, # TODO avoid gender switch, mark someway on dictionary load (_g => other_gender)
	# stats=>1, # TODO build statistics from sample.txt (machado de assis texts)
	);
	getopt('VOPGS', \%weight);
	my ($start, $end) = @ARGV;

	die "word length does not match" unless length $start == length $end;

	# DICTIONARY

	my %word = ();

	my $accent = do "accent.data";
	# { 'á'=>'a','ä'=>'a','à'=>'a','â'=>'a','ã'=>'a', 'å'=>'a',
	sub norm($)
	{
	local $_ = shift;
	s{[\x7f-\xff]}{exists $accent->{$&} ? $accent->{$&} : $&}ge;
	lc $_
	}

	if($weight{other}) {
	open WORD, "wordlist.txt" or die "$!";
	# abdicaç:ão,ões
	# abdome:,s
	# Uruaçu
	# únic:a,as,o,os
	while(<WORD>) {
	chomp;
	next if /^[A-Z]/;
	@_ = split /:/;
	if(@_ == 2) {
	foreach (split /,/, $_[1]) {
	$_ = $_[0].$_;
	my $__ = norm $_;
	if(/s$/) {
	next if not $weight{plural};
	$word{$__}->{_p}++;
	}
	$word{$__}->{$_}++;
	}
	} else {
	$word{norm($_)}->{$_}++;
	}
	}
	close WORD;
	}

	if($weight{verb}) {
	open VERB, "verblist.txt" or die "$!";
	# confluía:confluías:confluía:confluíamos:confluíeis:confluíam
	# confluir:confluindo:confluído
	# ser:sendo:sido
	# aceito
	while(<VERB>) {
	chomp;
	foreach (split /:/) {
	$_ = norm($_);
	$word{$_}->{_v}++;
	$word{$_}->{$_}++;
	}
	}
	close VERB;
	}

	printf "Dictionary loaded with %d words.\n", scalar keys %word;

	foreach ($start, $end) {
	die "$_ not on dictionary" unless exists $word{norm($_)};
	}

	# GRAPH

	use Graph;
	my $g = Graph::Undirected->new;

	my $n = 0;
	foreach my $w1 (sort keys %word) {
	next unless length $start == length $w1;
	for(my $i=0; $i < length $w1; $i++) {
	my @w2 = (substr($w1, 0, $i), substr($w1,$i,1), substr($w1, $i+1));
	foreach my $c ('a'..'z') {
	next if $c eq $w2[1];
	my $w2 = $w2[0].$c.$w2[2];
	next unless exists $word{$w2};
	my $h =
	exists $word{$w1}->{_v} \|\| exists $word{$w2}->{_v} ? $weight{verb} :
	exists $word{$w1}->{_p} \|\| exists $word{$w2}->{_p} ? $weight{plural} :
	# !$w2[2] && (
	# ($c eq 'a' && $w2[1] eq 'o') \|\|
	# ($c eq 'o' && $w2[1] eq 'a') ) ? $weight{gender} :
	$weight{other};
	$g->add_weighted_edge($w1,$w2,$h);
	}
	}
	printf "\r%d words linked", ++$n;
	}
	print ".\n";

	my @path = $g->SP_Dijkstra(norm($start), norm($end));
	print "Normal solution: ", join(",", @path), "\n";
	print "Dictionary Solutions:\n";

	foreach my $w (@path) {
	print join(" \| ", sort grep {!/^_/} keys %{$word{$w}}), "\n";
	}