gfldex/crucialrhyme-1.p6

## crucialrhyme-1.p6
#!/usr/bin/env perl6
# a simple script to take a dictionary w/ one word per line, e.g.
# /usr/share/dict/words, and count trigrams of letters in order to
# build a model to generate plausible nonsense words

# pretty much all the slowness happens in this function
# admittedly it gets called thousands of times

use nqp;

sub update_dict(\dict, Str $w) {
        my $tagged := '$$' ~ $w ~ '%';
        my str $tagged_native = nqp::unbox_s($tagged);
        my int $max = nqp::chars($tagged_native);

        loop (my int $idx = 0; $idx < $max - 2; $idx = $idx + 1) {
                dict{nqp::substr($tagged_native,$idx,3)}++;
        }
}

my Int %countdict;
for lines() -> $w {
        update_dict(%countdict, $w);
}
say "processed";
my @c = unique(gather {
        for %countdict.keys -> $k {
                take $_ for $k.split('');
        }
});

sub getnext(%dict, @c, $cur) {
        my $firsttwo = $cur.substr(1,2);
        my $m = 0;
        my $mv = '%';
        for @c -> $v {
                my $score = rand * (%dict{$firsttwo ~ $v} // 0);
                if $score > $m {
                        $m = $score;
                        $mv = $v;
                }
        }
        return $mv;
}


sub getword(%dict, @c) {
        my $str = '$$' ~ @c.pick;
        until $str.substr($str.chars-1, 1) === '%' {
                my $cur = $str.flip.substr(0,3).flip;
                $str = $str ~ getnext(%countdict, @c, $cur);
        }
        return $str.match(/\w+/);
}

say getword(%countdict, @c) for ^7;
	#!/usr/bin/env perl6
	# a simple script to take a dictionary w/ one word per line, e.g.
	# /usr/share/dict/words, and count trigrams of letters in order to
	# build a model to generate plausible nonsense words

	# pretty much all the slowness happens in this function
	# admittedly it gets called thousands of times

	use nqp;

	sub update_dict(\dict, Str $w) {
	my $tagged := '$$' ~ $w ~ '%';
	my str $tagged_native = nqp::unbox_s($tagged);
	my int $max = nqp::chars($tagged_native);

	loop (my int $idx = 0; $idx < $max - 2; $idx = $idx + 1) {
	dict{nqp::substr($tagged_native,$idx,3)}++;
	}
	}

	my Int %countdict;
	for lines() -> $w {
	update_dict(%countdict, $w);
	}
	say "processed";
	my @c = unique(gather {
	for %countdict.keys -> $k {
	take $_ for $k.split('');
	}
	});

	sub getnext(%dict, @c, $cur) {
	my $firsttwo = $cur.substr(1,2);
	my $m = 0;
	my $mv = '%';
	for @c -> $v {
	my $score = rand * (%dict{$firsttwo ~ $v} // 0);
	if $score > $m {
	$m = $score;
	$mv = $v;
	}
	}
	return $mv;
	}


	sub getword(%dict, @c) {
	my $str = '$$' ~ @c.pick;
	until $str.substr($str.chars-1, 1) === '%' {
	my $cur = $str.flip.substr(0,3).flip;
	$str = $str ~ getnext(%countdict, @c, $cur);
	}
	return $str.match(/\w+/);
	}

	say getword(%countdict, @c) for ^7;