Last active
November 11, 2018 22:20
-
-
Save DeeNewcum/dd1ba0e4ed2627fbbc54c339f8e13d3d to your computer and use it in GitHub Desktop.
solve \u\lennythebox's puzzle from reddit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# https://old.reddit.com/r/puzzles/comments/9vy5qm/desperately_need_help_on_number_15_this_is_from/ | |
use strict; | |
use warnings; | |
use utf8; | |
use Data::Dumper; | |
#use Devel::Comments; # uncomment this during development to enable the ### debugging statements | |
binmode STDOUT, ":utf8"; | |
my @EBMAB; | |
######## Read in words that might fit, from /usr/share/dict/words ######## | |
open FIN, '<', '/usr/share/dict/words' or die; | |
while (<FIN>) { | |
chomp; | |
# does this match any of the EBMAB? | |
my $match = EBMAB($_); | |
if (defined($match)) { | |
push @{$EBMAB[$match]}, $_; | |
} | |
} | |
#print Dumper \@EBMAB; exit; | |
# How many are in each bucket? (answer, multiplied: 234,263,232) | |
#my $EBMAB = 'EBMAB'; | |
#for (my $ctr=0; $ctr<@EBMAB; $ctr++) { | |
# printf "%s %d\n", substr($EBMAB, $ctr, 1), scalar(@{$EBMAB[$ctr]}); | |
#} exit; | |
######## Read in the corpus ####### | |
my $accum = ''; | |
foreach my $txtfilename (glob '*.txt') { | |
open FIN, '<:encoding(UTF-8)', $txtfilename or die $!; | |
while (<FIN>) { | |
s/[\n\r]*$//s; | |
if (!/\S/) { # blank line | |
if ($accum =~ /\S/) { | |
process_paragraph($accum); | |
} | |
$accum = ''; | |
} else { | |
$accum = $accum . ' ' . $_; | |
} | |
} | |
process_paragraph($accum); | |
} | |
my %adjacent; | |
sub process_paragraph { | |
local $_ = shift; | |
s/^\s*//; | |
s/\s*$//; | |
s/_//g; # seems to be used something like italics or underline. It's hard to | |
# intelligently remove these, so we'll unintelligently remove them. | |
#s/"//g; # we could handle quotes a lot better -- split them out, and then recursively | |
# call ourselves to process them as an entirely separate paragraph | |
#print "$_\n\n\n"; | |
my @words = split/[\s"”“,\.;\?!—()]+/, $_; | |
@words = map { | |
s/[\x{2018}\x{2019}]/'/g; | |
s/'s$//; # remove some contractions | |
$_ = lc($_); | |
$_ | |
} @words; | |
#print join " ", @words, "\n\n"; | |
for (my $ctr=0; $ctr<@words-1; $ctr++) { | |
$adjacent{ $words[$ctr] . ' ' . $words[$ctr+1] }++; | |
} | |
} | |
#print Dumper \%adjacent; | |
#my @keys = keys %adjacent; | |
#my @order = sort {$adjacent{$b} <=> $adjacent{$a}} @keys; | |
#foreach my $k (@order) { | |
# printf "%5d %s\n", $adjacent{$k}, $k; | |
#} exit: | |
######## Go through every combination of EBMAB, and calculate each one's score ####### | |
open FOUT, '>:encoding(UTF-8)', 'COMBOS' or die $!; | |
my $highscore = 0; | |
#my %combos; | |
foreach my $E (@{$EBMAB[0]}) { # this probably should be recursive instead of being hand-unrolled | |
foreach my $B1 (@{$EBMAB[1]}) { | |
my $score = ($adjacent{"$E $B1"} || 0); | |
foreach my $M (@{$EBMAB[2]}) { | |
my $score = ($adjacent{"$E $B1"} || 0) | |
+ ($adjacent{"$B1 $M"} || 0); | |
next if ($score < 2); | |
foreach my $A (@{$EBMAB[3]}) { | |
my $score = ($adjacent{"$E $B1"} || 0) | |
+ ($adjacent{"$B1 $M"} || 0) | |
+ ($adjacent{"$M $A"} || 0); | |
next if ($score < 6); | |
foreach my $B2 (@{$EBMAB[4]}) { | |
my $score = | |
($adjacent{"$E $B1"} || 0) | |
+ ($adjacent{"$B1 $M"} || 0) | |
+ ($adjacent{"$M $A"} || 0) | |
+ ($adjacent{"$A $B2"} || 0); | |
#$combos{"$E $B1 $M $A $B2"} = $score; | |
printf FOUT "%d %s\n", $score, "$E $B1 $M $A $B2"; | |
if (0 && $score >= $highscore) { | |
$highscore = $score; | |
printf "%d %s\n", $score, "$E $B1 $M $A $B2"; | |
} | |
if (1 && $score > 65) { | |
printf "%d %s\n", $score, "$E $B1 $M $A $B2"; | |
} | |
} | |
} | |
} | |
} | |
} | |
#print Dumper \%combos; exit; | |
# Does a given word match any of the EBMABs? | |
# Returns undef to indicate false, otherwise returns a number indicating which letter matched. | |
sub EBMAB { | |
my $word = shift; | |
if ($word =~ /^e\w{4}$/) { | |
return 0; | |
} | |
if ($word =~ /^b\w{2}$/) { | |
return 1; | |
} | |
if ($word =~ /^m\w{5}$/) { | |
return 2; | |
} | |
if ($word =~ /^a$/) { | |
return 3; | |
} | |
if ($word =~ /^b\w{3}$/) { | |
return 4; | |
} | |
return undef; # no match found | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment