Created
November 18, 2021 17:58
-
-
Save Gro-Tsen/32de157a39542b93ba790d550dd35e3b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/perl -w | |
# Find auto-descriptive sentences in French, such as the following: | |
# "Cette phrase comporte huit a, six c, sept d, vingt-cinq e, deux f, six g, six h, vingt-sept i, deux l, deux m, quinze n, huit o, neuf p, six q, dix r, dix-huit s, vingt-trois t, seize u, six v, treize x, quatre z, cinquante-six espaces, six traits d'union, une apostrophe, vingt-quatre virgules et un point." | |
# "Cette jolie phrase contient huit a, huit c, cinq d, vingt-sept e, quatre f, sept g, sept h, vingt-six i, deux j, trois l, une m, vingt-quatre n, dix o, onze p, huit q, neuf r, quinze s, vingt-neuf t, dix-huit u, sept v, cinq x, trois z, cinquante-neuf espaces, sept traits d'union, une apostrophe, vingt-cinq virgules et un point." | |
# To do this, we iterate replacing a sentence by its description | |
# until, hopefully, we reach a fixed point. Generally this won't | |
# happen (we hit a loop instead). When this happens, we randomly | |
# change the initial blurb and try iterating again with this new blurb | |
# to break out of the loop. | |
use strict; | |
use warnings; | |
use utf8; | |
use open IN => ':utf8'; | |
binmode STDIN, ":utf8"; | |
binmode STDERR, ":utf8"; | |
binmode STDOUT, ":utf8"; | |
my @units = ( "zéro", "un", "deux", "trois", "quatre", | |
"cinq", "six", "sept", "huit", "neuf" ); | |
my @teens = ( "dix", "onze", "douze", "treize", "quatorze", | |
"quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" ); | |
my @tens = ( "", "dix", "vingt", "trente", "quarante", | |
"cinquante", "soixante", "septante", "huitante", "nonante" ); | |
sub name100 { | |
# Return the French name of $n (with $n<100), old (Swiss) style if | |
# $oldstyle, feminine if $feminine. | |
use integer; | |
my $n = shift; | |
my $oldstyle = shift; | |
my $feminine = shift; | |
die "Bad input to name100" unless $n >= 0 && $n < 100 && $n == ($n/10)*10+($n%10); | |
if ( $n < 10 ) { | |
return $units[$n] . ($feminine && ($n%10==1) ? "e" : ""); | |
} elsif ( $n < 20 ) { | |
return $teens[$n%10]; | |
} elsif ( $oldstyle || $n < 70 ) { | |
my $s = $tens[$n/10]; | |
if ( $n%10 == 1 ) { | |
$s .= " et ".$units[$n%10] . ($feminine ? "e" : ""); | |
} elsif ( $n%10 >= 2 ) { | |
$s .= "-".$units[$n%10]; | |
} | |
return $s; | |
} elsif ( $n < 80 ) { | |
my $s = $tens[6]; | |
if ( $n%10 == 1 ) { | |
$s .= " et ".$teens[$n%10]; | |
} else { | |
$s .= "-".$teens[$n%10]; | |
} | |
return $s; | |
} else { | |
my $s = "quatre-vingt"; | |
if ( $n%20 == 0 ) { | |
$s .= "s"; | |
} elsif ( $n%20 < 10 ) { | |
$s .= "-".$units[$n%10] . ($feminine && ($n%10==1) ? "e" : ""); | |
} else { | |
$s .= "-".$teens[$n%10]; | |
} | |
return $s; | |
} | |
} | |
sub name1000 { | |
# Return the French name of $n (with $n<1000), old (Swiss) style if | |
# $oldstyle, feminine if $feminine. | |
use integer; | |
my $n = shift; | |
my $oldstyle = shift; | |
my $feminine = shift; | |
die "Bad input to name1000" unless $n >= 0 && $n < 1000 && $n == ($n/100)*100+($n%100); | |
if ( $n < 100 ) { | |
return name100($n,$oldstyle,$feminine); | |
} elsif ( $n < 200 ) { | |
my $s = "cent"; | |
$s .= " ".name100($n%100,$oldstyle,$feminine) if $n%100; | |
return $s; | |
} else { | |
my $s = $units[$n/100] . " cent"; | |
if ( $n%100 == 0 ) { | |
$s .= "s"; | |
} else { | |
$s .= " ".name100($n%100,$oldstyle,$feminine); | |
} | |
return $s; | |
} | |
} | |
sub countparts { | |
# Return a reference to a hash counting the number of different | |
# items in a sentence (mostly characters, but letters are | |
# normalized to lowercase, and accents are counted separately). | |
my $str = shift; | |
my %t; | |
for ( my $i=0 ; $i<length($str) ; $i++ ) { | |
my $c = substr($str,$i,1); | |
if ( $c ge "A" && $c le "Z" ) { | |
$c = chr(ord($c)-ord("A")+ord("a")); | |
} elsif ( $c eq "é" || $c eq "É" ) { | |
$t{"´"}++; | |
$c = "e"; | |
} elsif ( $c eq "è" || $c eq "È" ) { | |
$t{"`"}++; | |
$c = "e"; | |
} elsif ( $c eq "ê" || $c eq "Ê" ) { | |
$t{"^"}++; | |
$c = "e"; | |
} | |
# Add more cases as might occur! | |
$t{$c}++; | |
} | |
return \%t; | |
} | |
my %letter_gender = ( | |
# Some letters are traditionally feminine: this marks them as such. | |
a => 0, b => 0, c => 0, d => 0, e => 0, | |
f => 1, g => 0, h => 1, i => 0, j => 0, | |
k => 0, l => 1, m => 1, n => 1, o => 0, | |
p => 0, q => 0, r => 1, s => 1, t => 0, | |
u => 0, v => 0, w => 0, x => 0, y => 0, | |
z => 0 | |
); | |
sub describe { | |
# Returns the full description of a sentence but without any | |
# starter blurb (“Cette phrase contient”). | |
my $str = shift; | |
my $oldstyle = shift; # Use old style (Swiss) number names. | |
my $tradgenders = shift; # Use traditional letter genders. | |
my $tr = countparts($str); | |
my @t; | |
for ( my $i=0 ; $i<26 ; $i++ ) { | |
my $c = chr(ord("a")+$i); | |
push @t, (name1000($tr->{$c},$oldstyle,($tradgenders&&$letter_gender{$c})) . " " . $c) | |
if $tr->{$c}; | |
} | |
push @t, (name1000($tr->{"´"},$oldstyle,0) . " " . ($tr->{"´"}>1?"accents aigus":"accent aigu")) | |
if $tr->{"´"}; | |
push @t, (name1000($tr->{"`"},$oldstyle,0) . " " . ($tr->{"`"}>1?"accents graves":"accent grave")) | |
if $tr->{"`"}; | |
push @t, (name1000($tr->{"^"},$oldstyle,0) . " " . ($tr->{"^"}>1?"accents circonflexes":"accent circonflexe")) | |
if $tr->{"^"}; | |
push @t, (name1000($tr->{" "},$oldstyle,1) . " " . ($tr->{" "}>1?"espaces":"espace")) | |
if $tr->{" "}; | |
push @t, (name1000($tr->{"-"},$oldstyle,0) . " " . ($tr->{"-"}>1?"traits d'union":"trait d'union")) | |
if $tr->{"-"}; | |
push @t, (name1000($tr->{"'"},$oldstyle,1) . " " . ($tr->{"'"}>1?"apostrophes":"apostrophe")) | |
if $tr->{"'"}; | |
push @t, (name1000($tr->{","},$oldstyle,1) . " " . ($tr->{","}>1?"virgules":"virgule")) | |
if $tr->{","}; | |
push @t, (name1000($tr->{"."},$oldstyle,0) . " " . ($tr->{"."}>1?"points":"point")) | |
if $tr->{"."}; | |
my $descr = ""; | |
for ( my $j=0 ; $j<scalar(@t) ; $j++ ) { | |
$descr .= ($j==0?"":$j==scalar(@t)-1?" et ":", ") . $t[$j]; | |
} | |
$descr .= "."; | |
return $descr; | |
} | |
my $bootstrap = ""; | |
MAINLOOP: while ( 1 ) { | |
# We randomly select the starter blurb, whether numbers will be | |
# written old style and whether to give letters their traditional | |
# gender. | |
my $oldstyle = int(rand(2)); | |
my $tradgenders = int(rand(2)); | |
my $starter; | |
my $quevoici = int(rand(2)); | |
$starter = $quevoici?"La":"Cette"; | |
$starter .= " ".(int(rand(2))?"petite":"jolie") if int(rand(3)); | |
$starter .= " phrase"; | |
$starter .= " ".(int(rand(2))?"auto-":"auto").(int(rand(2))?"descriptive":"référentielle") if int(rand(2)); | |
$starter .= ($quevoici?" que".(int(rand(2))?" voici":" vous lisez"):""); | |
$starter .= " ".(int(rand(2))?"contient":"comporte"); | |
$starter .= " ".(int(rand(2))?(int(rand(2))?"tout ":"")."juste":(int(rand(2))?"très ":"").(int(rand(2))?"exactement":"précisément")) if int(rand(3)); | |
$starter .= " "; | |
# Start trying with a bootstrap sentence: we use the one generated | |
# at previous loop... | |
my $str = $bootstrap; | |
for ( my $i=0 ; $i<26 ; $i++ ) { | |
# ...except that we randomly suppress some letters if they | |
# aren't in the starter blurb (to avoid getting stuck with | |
# something like "un b" if there is no 'b' anywhere else in | |
# the sentence). | |
my $c = chr(ord("a")+$i); | |
if ( int(rand(3)) && $starter !~ m/$c/ ) { | |
$str =~ s/$c//g; | |
} | |
} | |
# Now iterate describing the sentence until we loop: | |
my %revtab; # Track sentences met (at this iteration of main loop) | |
for ( my $cnt=0 ; ; $cnt++ ) { | |
$revtab{$str} = $cnt; | |
my $str2 = $starter . describe($str, $oldstyle, $tradgenders); | |
if ( $str2 eq $str ) { | |
# Success: sentence equals its description. | |
printf STDERR "\e[32mSuccess! Autodescriptive sentence follows:\e[0m\n"; | |
print "$str\n"; | |
exit 0; | |
} | |
if ( defined($revtab{$str2}) ) { | |
# We hit a loop: try with a different starter. | |
printf STDERR "\e[31mLoop %d->%d, giving up\e[0m\n", $revtab{$str2}, ($cnt+1); | |
printf STDERR "\e[31mFailed sentence was:\e[0m %s\n", $str2; | |
$bootstrap = $str2; | |
next MAINLOOP; | |
} | |
$str = $str2; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Voir https://twitter.com/gro_tsen/status/1461408243910066176 pour une discussion