Skip to content

Instantly share code, notes, and snippets.

@Gro-Tsen
Created November 18, 2021 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gro-Tsen/32de157a39542b93ba790d550dd35e3b to your computer and use it in GitHub Desktop.
Save Gro-Tsen/32de157a39542b93ba790d550dd35e3b to your computer and use it in GitHub Desktop.
#! /usr/local/bin/perl -w
# Find auto-descriptive sentences in French, such as the following:
# "Cette phrase comporte huit a, six c, sept d, vingt-cinq e, deux f, six g, six h, vingt-sept i, deux l, deux m, quinze n, huit o, neuf p, six q, dix r, dix-huit s, vingt-trois t, seize u, six v, treize x, quatre z, cinquante-six espaces, six traits d'union, une apostrophe, vingt-quatre virgules et un point."
# "Cette jolie phrase contient huit a, huit c, cinq d, vingt-sept e, quatre f, sept g, sept h, vingt-six i, deux j, trois l, une m, vingt-quatre n, dix o, onze p, huit q, neuf r, quinze s, vingt-neuf t, dix-huit u, sept v, cinq x, trois z, cinquante-neuf espaces, sept traits d'union, une apostrophe, vingt-cinq virgules et un point."
# To do this, we iterate replacing a sentence by its description
# until, hopefully, we reach a fixed point. Generally this won't
# happen (we hit a loop instead). When this happens, we randomly
# change the initial blurb and try iterating again with this new blurb
# to break out of the loop.
use strict;
use warnings;
use utf8;
use open IN => ':utf8';
binmode STDIN, ":utf8";
binmode STDERR, ":utf8";
binmode STDOUT, ":utf8";
my @units = ( "zéro", "un", "deux", "trois", "quatre",
"cinq", "six", "sept", "huit", "neuf" );
my @teens = ( "dix", "onze", "douze", "treize", "quatorze",
"quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" );
my @tens = ( "", "dix", "vingt", "trente", "quarante",
"cinquante", "soixante", "septante", "huitante", "nonante" );
sub name100 {
# Return the French name of $n (with $n<100), old (Swiss) style if
# $oldstyle, feminine if $feminine.
use integer;
my $n = shift;
my $oldstyle = shift;
my $feminine = shift;
die "Bad input to name100" unless $n >= 0 && $n < 100 && $n == ($n/10)*10+($n%10);
if ( $n < 10 ) {
return $units[$n] . ($feminine && ($n%10==1) ? "e" : "");
} elsif ( $n < 20 ) {
return $teens[$n%10];
} elsif ( $oldstyle || $n < 70 ) {
my $s = $tens[$n/10];
if ( $n%10 == 1 ) {
$s .= " et ".$units[$n%10] . ($feminine ? "e" : "");
} elsif ( $n%10 >= 2 ) {
$s .= "-".$units[$n%10];
}
return $s;
} elsif ( $n < 80 ) {
my $s = $tens[6];
if ( $n%10 == 1 ) {
$s .= " et ".$teens[$n%10];
} else {
$s .= "-".$teens[$n%10];
}
return $s;
} else {
my $s = "quatre-vingt";
if ( $n%20 == 0 ) {
$s .= "s";
} elsif ( $n%20 < 10 ) {
$s .= "-".$units[$n%10] . ($feminine && ($n%10==1) ? "e" : "");
} else {
$s .= "-".$teens[$n%10];
}
return $s;
}
}
sub name1000 {
# Return the French name of $n (with $n<1000), old (Swiss) style if
# $oldstyle, feminine if $feminine.
use integer;
my $n = shift;
my $oldstyle = shift;
my $feminine = shift;
die "Bad input to name1000" unless $n >= 0 && $n < 1000 && $n == ($n/100)*100+($n%100);
if ( $n < 100 ) {
return name100($n,$oldstyle,$feminine);
} elsif ( $n < 200 ) {
my $s = "cent";
$s .= " ".name100($n%100,$oldstyle,$feminine) if $n%100;
return $s;
} else {
my $s = $units[$n/100] . " cent";
if ( $n%100 == 0 ) {
$s .= "s";
} else {
$s .= " ".name100($n%100,$oldstyle,$feminine);
}
return $s;
}
}
sub countparts {
# Return a reference to a hash counting the number of different
# items in a sentence (mostly characters, but letters are
# normalized to lowercase, and accents are counted separately).
my $str = shift;
my %t;
for ( my $i=0 ; $i<length($str) ; $i++ ) {
my $c = substr($str,$i,1);
if ( $c ge "A" && $c le "Z" ) {
$c = chr(ord($c)-ord("A")+ord("a"));
} elsif ( $c eq "é" || $c eq "É" ) {
$t{"´"}++;
$c = "e";
} elsif ( $c eq "è" || $c eq "È" ) {
$t{"`"}++;
$c = "e";
} elsif ( $c eq "ê" || $c eq "Ê" ) {
$t{"^"}++;
$c = "e";
}
# Add more cases as might occur!
$t{$c}++;
}
return \%t;
}
my %letter_gender = (
# Some letters are traditionally feminine: this marks them as such.
a => 0, b => 0, c => 0, d => 0, e => 0,
f => 1, g => 0, h => 1, i => 0, j => 0,
k => 0, l => 1, m => 1, n => 1, o => 0,
p => 0, q => 0, r => 1, s => 1, t => 0,
u => 0, v => 0, w => 0, x => 0, y => 0,
z => 0
);
sub describe {
# Returns the full description of a sentence but without any
# starter blurb (“Cette phrase contient”).
my $str = shift;
my $oldstyle = shift; # Use old style (Swiss) number names.
my $tradgenders = shift; # Use traditional letter genders.
my $tr = countparts($str);
my @t;
for ( my $i=0 ; $i<26 ; $i++ ) {
my $c = chr(ord("a")+$i);
push @t, (name1000($tr->{$c},$oldstyle,($tradgenders&&$letter_gender{$c})) . " " . $c)
if $tr->{$c};
}
push @t, (name1000($tr->{"´"},$oldstyle,0) . " " . ($tr->{"´"}>1?"accents aigus":"accent aigu"))
if $tr->{"´"};
push @t, (name1000($tr->{"`"},$oldstyle,0) . " " . ($tr->{"`"}>1?"accents graves":"accent grave"))
if $tr->{"`"};
push @t, (name1000($tr->{"^"},$oldstyle,0) . " " . ($tr->{"^"}>1?"accents circonflexes":"accent circonflexe"))
if $tr->{"^"};
push @t, (name1000($tr->{" "},$oldstyle,1) . " " . ($tr->{" "}>1?"espaces":"espace"))
if $tr->{" "};
push @t, (name1000($tr->{"-"},$oldstyle,0) . " " . ($tr->{"-"}>1?"traits d'union":"trait d'union"))
if $tr->{"-"};
push @t, (name1000($tr->{"'"},$oldstyle,1) . " " . ($tr->{"'"}>1?"apostrophes":"apostrophe"))
if $tr->{"'"};
push @t, (name1000($tr->{","},$oldstyle,1) . " " . ($tr->{","}>1?"virgules":"virgule"))
if $tr->{","};
push @t, (name1000($tr->{"."},$oldstyle,0) . " " . ($tr->{"."}>1?"points":"point"))
if $tr->{"."};
my $descr = "";
for ( my $j=0 ; $j<scalar(@t) ; $j++ ) {
$descr .= ($j==0?"":$j==scalar(@t)-1?" et ":", ") . $t[$j];
}
$descr .= ".";
return $descr;
}
my $bootstrap = "";
MAINLOOP: while ( 1 ) {
# We randomly select the starter blurb, whether numbers will be
# written old style and whether to give letters their traditional
# gender.
my $oldstyle = int(rand(2));
my $tradgenders = int(rand(2));
my $starter;
my $quevoici = int(rand(2));
$starter = $quevoici?"La":"Cette";
$starter .= " ".(int(rand(2))?"petite":"jolie") if int(rand(3));
$starter .= " phrase";
$starter .= " ".(int(rand(2))?"auto-":"auto").(int(rand(2))?"descriptive":"référentielle") if int(rand(2));
$starter .= ($quevoici?" que".(int(rand(2))?" voici":" vous lisez"):"");
$starter .= " ".(int(rand(2))?"contient":"comporte");
$starter .= " ".(int(rand(2))?(int(rand(2))?"tout ":"")."juste":(int(rand(2))?"très ":"").(int(rand(2))?"exactement":"précisément")) if int(rand(3));
$starter .= " ";
# Start trying with a bootstrap sentence: we use the one generated
# at previous loop...
my $str = $bootstrap;
for ( my $i=0 ; $i<26 ; $i++ ) {
# ...except that we randomly suppress some letters if they
# aren't in the starter blurb (to avoid getting stuck with
# something like "un b" if there is no 'b' anywhere else in
# the sentence).
my $c = chr(ord("a")+$i);
if ( int(rand(3)) && $starter !~ m/$c/ ) {
$str =~ s/$c//g;
}
}
# Now iterate describing the sentence until we loop:
my %revtab; # Track sentences met (at this iteration of main loop)
for ( my $cnt=0 ; ; $cnt++ ) {
$revtab{$str} = $cnt;
my $str2 = $starter . describe($str, $oldstyle, $tradgenders);
if ( $str2 eq $str ) {
# Success: sentence equals its description.
printf STDERR "\e[32mSuccess! Autodescriptive sentence follows:\e[0m\n";
print "$str\n";
exit 0;
}
if ( defined($revtab{$str2}) ) {
# We hit a loop: try with a different starter.
printf STDERR "\e[31mLoop %d->%d, giving up\e[0m\n", $revtab{$str2}, ($cnt+1);
printf STDERR "\e[31mFailed sentence was:\e[0m %s\n", $str2;
$bootstrap = $str2;
next MAINLOOP;
}
$str = $str2;
}
}
@Gro-Tsen
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment