Skip to content

Instantly share code, notes, and snippets.

@gfldex
Created November 18, 2015 20:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gfldex/e8469ad25ee559e7c8ca to your computer and use it in GitHub Desktop.
Save gfldex/e8469ad25ee559e7c8ca to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl6
# a simple script to take a dictionary w/ one word per line, e.g.
# /usr/share/dict/words, and count trigrams of letters in order to
# build a model to generate plausible nonsense words
# pretty much all the slowness happens in this function
# admittedly it gets called thousands of times
use nqp;
sub update_dict(\dict, Str $w) {
my $tagged := '$$' ~ $w ~ '%';
my str $tagged_native = nqp::unbox_s($tagged);
my int $max = nqp::chars($tagged_native);
loop (my int $idx = 0; $idx < $max - 2; $idx = $idx + 1) {
dict{nqp::substr($tagged_native,$idx,3)}++;
}
}
my Int %countdict;
for lines() -> $w {
update_dict(%countdict, $w);
}
say "processed";
my @c = unique(gather {
for %countdict.keys -> $k {
take $_ for $k.split('');
}
});
sub getnext(%dict, @c, $cur) {
my $firsttwo = $cur.substr(1,2);
my $m = 0;
my $mv = '%';
for @c -> $v {
my $score = rand * (%dict{$firsttwo ~ $v} // 0);
if $score > $m {
$m = $score;
$mv = $v;
}
}
return $mv;
}
sub getword(%dict, @c) {
my $str = '$$' ~ @c.pick;
until $str.substr($str.chars-1, 1) === '%' {
my $cur = $str.flip.substr(0,3).flip;
$str = $str ~ getnext(%countdict, @c, $cur);
}
return $str.match(/\w+/);
}
say getword(%countdict, @c) for ^7;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment