Created
November 20, 2021 13:59
-
-
Save Gro-Tsen/d1bbfaae335ce164e86a4db3d70c1785 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/perl -w | |
# Find auto-descriptive sentences in English, such as the following: | |
# To do this, we iterate replacing a sentence by its description | |
# until, hopefully, we reach a fixed point. Generally this won't | |
# happen (we hit a loop instead). When this happens, we randomly | |
# change the initial blurb and try iterating again with this new blurb | |
# to break out of the loop. | |
use strict; | |
use warnings; | |
use utf8; | |
use open IN => ':utf8'; | |
binmode STDIN, ":utf8"; | |
binmode STDERR, ":utf8"; | |
binmode STDOUT, ":utf8"; | |
my @units = ( "zero", "one", "two", "three", "four", | |
"five", "six", "seven", "eight", "nine" ); | |
my @teens = ( "ten", "eleven", "twelve", "thirteen", "fourteen", | |
"fifteen", "sixteen", "seventeen", "eighteen", "nineteen" ); | |
my @tens = ( "", "ten", "twenty", "thirty", "forty", | |
"fifty", "sixty", "seventy", "eighty", "ninety" ); | |
sub name100 { | |
use integer; | |
my $n = shift; | |
die "Bad input to name100" unless $n >= 0 && $n < 100 && $n == ($n/10)*10+($n%10); | |
if ( $n < 10 ) { | |
return $units[$n]; | |
} elsif ( $n < 20 ) { | |
return $teens[$n%10]; | |
} else { | |
my $s = $tens[$n/10]; | |
$s .= "-".$units[$n%10]; | |
return $s; | |
} | |
} | |
sub name1000 { | |
use integer; | |
my $n = shift; | |
die "Bad input to name1000" unless $n >= 0 && $n < 1000 && $n == ($n/100)*100+($n%100); | |
if ( $n < 100 ) { | |
return name100($n); | |
} else { | |
my $s = $units[$n/100] . " hundred"; | |
if ( $n%100 == 0 ) { | |
} else { | |
$s .= " ".name100($n%100); | |
} | |
return $s; | |
} | |
} | |
sub countparts { | |
# Return a reference to a hash counting the number of different | |
# items in a sentence (mostly characters, but letters are | |
# normalized to lowercase, and accents are counted separately). | |
my $str = shift; | |
my %t; | |
for ( my $i=0 ; $i<length($str) ; $i++ ) { | |
my $c = substr($str,$i,1); | |
if ( $c ge "A" && $c le "Z" ) { | |
$c = chr(ord($c)-ord("A")+ord("a")); | |
} | |
$t{$c}++; | |
} | |
return \%t; | |
} | |
sub describe { | |
# Returns the full description of a sentence but without any | |
# starter blurb (“Cette phrase contient”). | |
my $str = shift; | |
my $tr = countparts($str); | |
my @t; | |
for ( my $i=0 ; $i<26 ; $i++ ) { | |
my $c = chr(ord("a")+$i); | |
push @t, (name1000($tr->{$c}) . " " . $c . ($tr->{$c}>1?"'s":"")) | |
if $tr->{$c}; | |
} | |
push @t, (name1000($tr->{" "}) . " " . ($tr->{" "}>1?"spaces":"space")) | |
if $tr->{" "}; | |
push @t, (name1000($tr->{"-"}) . " " . ($tr->{"-"}>1?"hyphens":"hyphen")) | |
if $tr->{"-"}; | |
push @t, (name1000($tr->{"'"}) . " " . ($tr->{"'"}>1?"apostrophes":"apostrophe")) | |
if $tr->{"'"}; | |
push @t, (name1000($tr->{","}) . " " . ($tr->{","}>1?"commas":"comma")) | |
if $tr->{","}; | |
push @t, (name1000($tr->{"."}) . " " . ($tr->{"."}>1?"periods":"period")) | |
if $tr->{"."}; | |
my $descr = ""; | |
for ( my $j=0 ; $j<scalar(@t) ; $j++ ) { | |
$descr .= ($j==0?"":$j==scalar(@t)-1?" and ":", ") . $t[$j]; | |
} | |
$descr .= "."; | |
return $descr; | |
} | |
my $bootstrap = ""; | |
MAINLOOP: while ( 1 ) { | |
# We randomly select the starter blurb, whether numbers will be | |
# written old style and whether to give letters their traditional | |
# gender. | |
my $starter; | |
my $quevoici = int(rand(2)); | |
$starter = $quevoici?"The":"This"; | |
$starter .= " ".(int(rand(2))?"nice":"cute") if int(rand(3)); | |
$starter .= " "."self-".(int(rand(2))?"descriptive":"referential") if int(rand(2)); | |
$starter .= " sentence"; | |
$starter .= ($quevoici?(" which you're".(int(rand(2))?"":int(rand(2))?" now":" currently")." reading"):""); | |
$starter .= " ".(int(rand(2))?"contains":"comprises"); | |
$starter .= " ".(int(rand(2))?"just":(int(rand(2))?"very ":"").(int(rand(2))?"exactly":"precisely")) if int(rand(3)); | |
$starter .= " "; | |
# Start trying with a bootstrap sentence: we use the one generated | |
# at previous loop... | |
my $str = $bootstrap; | |
for ( my $i=0 ; $i<26 ; $i++ ) { | |
# ...except that we randomly suppress some letters if they | |
# aren't in the starter blurb (to avoid getting stuck with | |
# something like "un b" if there is no 'b' anywhere else in | |
# the sentence). | |
my $c = chr(ord("a")+$i); | |
if ( int(rand(3)) && $starter !~ m/$c/ ) { | |
$str =~ s/$c//g; | |
} | |
} | |
# Now iterate describing the sentence until we loop: | |
my %revtab; # Track sentences met (at this iteration of main loop) | |
for ( my $cnt=0 ; ; $cnt++ ) { | |
$revtab{$str} = $cnt; | |
my $str2 = $starter . describe($str); | |
if ( $str2 eq $str ) { | |
# Success: sentence equals its description. | |
printf STDERR "\e[32mSuccess! Autodescriptive sentence follows:\e[0m\n"; | |
print "$str\n"; | |
exit 0; | |
} | |
if ( defined($revtab{$str2}) ) { | |
# We hit a loop: try with a different starter. | |
printf STDERR "\e[31mLoop %d->%d, giving up\e[0m\n", $revtab{$str2}, ($cnt+1); | |
printf STDERR "\e[31mFailed sentence was:\e[0m %s\n", $str2; | |
$bootstrap = $str2; | |
next MAINLOOP; | |
} | |
$str = $str2; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See https://twitter.com/gro_tsen/status/1462058281967493124 for discussion