Gro-Tsen/autodescriptive.pl

## autodescriptive.pl
#! /usr/local/bin/perl -w

# Find auto-descriptive sentences in French, such as the following:

# "Cette phrase comporte huit a, six c, sept d, vingt-cinq e, deux f, six g, six h, vingt-sept i, deux l, deux m, quinze n, huit o, neuf p, six q, dix r, dix-huit s, vingt-trois t, seize u, six v, treize x, quatre z, cinquante-six espaces, six traits d'union, une apostrophe, vingt-quatre virgules et un point."

# "Cette jolie phrase contient huit a, huit c, cinq d, vingt-sept e, quatre f, sept g, sept h, vingt-six i, deux j, trois l, une m, vingt-quatre n, dix o, onze p, huit q, neuf r, quinze s, vingt-neuf t, dix-huit u, sept v, cinq x, trois z, cinquante-neuf espaces, sept traits d'union, une apostrophe, vingt-cinq virgules et un point."

# To do this, we iterate replacing a sentence by its description
# until, hopefully, we reach a fixed point.  Generally this won't
# happen (we hit a loop instead).  When this happens, we randomly
# change the initial blurb and try iterating again with this new blurb
# to break out of the loop.

use strict;
use warnings;
use utf8;

use open IN => ':utf8';
binmode STDIN, ":utf8";
binmode STDERR, ":utf8";
binmode STDOUT, ":utf8";

my @units = ( "zéro", "un", "deux", "trois", "quatre",
	      "cinq", "six", "sept", "huit", "neuf" );
my @teens = ( "dix", "onze", "douze", "treize", "quatorze",
	      "quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" );
my @tens = ( "", "dix", "vingt", "trente", "quarante",
	     "cinquante", "soixante", "septante", "huitante", "nonante" );

sub name100 {
    # Return the French name of $n (with $n<100), old (Swiss) style if
    # $oldstyle, feminine if $feminine.
    use integer;
    my $n = shift;
    my $oldstyle = shift;
    my $feminine = shift;
    die "Bad input to name100" unless $n >= 0 && $n < 100 && $n == ($n/10)*10+($n%10);
    if ( $n < 10 ) {
	return $units[$n] . ($feminine && ($n%10==1) ? "e" : "");
    } elsif ( $n < 20 ) {
	return $teens[$n%10];
    } elsif ( $oldstyle || $n < 70 ) {
	my $s = $tens[$n/10];
	if ( $n%10 == 1 ) {
	    $s .= " et ".$units[$n%10] . ($feminine ? "e" : "");
	} elsif ( $n%10 >= 2 ) {
	    $s .= "-".$units[$n%10];
	}
	return $s;
    } elsif ( $n < 80 ) {
	my $s = $tens[6];
	if ( $n%10 == 1 ) {
	    $s .= " et ".$teens[$n%10];
	} else {
	    $s .= "-".$teens[$n%10];
	}
	return $s;
    } else {
	my $s = "quatre-vingt";
	if ( $n%20 == 0 ) {
	    $s .= "s";
	} elsif ( $n%20 < 10 ) {
	    $s .= "-".$units[$n%10] . ($feminine && ($n%10==1) ? "e" : "");
	} else {
	    $s .= "-".$teens[$n%10];
	}
	return $s;
    }
}

sub name1000 {
    # Return the French name of $n (with $n<1000), old (Swiss) style if
    # $oldstyle, feminine if $feminine.
    use integer;
    my $n = shift;
    my $oldstyle = shift;
    my $feminine = shift;
    die "Bad input to name1000" unless $n >= 0 && $n < 1000 && $n == ($n/100)*100+($n%100);
    if ( $n < 100 ) {
	return name100($n,$oldstyle,$feminine);
    } elsif ( $n < 200 ) {
	my $s = "cent";
	$s .= " ".name100($n%100,$oldstyle,$feminine) if $n%100;
	return $s;
    } else {
	my $s = $units[$n/100] . " cent";
	if ( $n%100 == 0 ) {
	    $s .= "s";
	} else {
	    $s .= " ".name100($n%100,$oldstyle,$feminine);
	}
	return $s;
    }
}

sub countparts {
    # Return a reference to a hash counting the number of different
    # items in a sentence (mostly characters, but letters are
    # normalized to lowercase, and accents are counted separately).
    my $str = shift;
    my %t;
    for ( my $i=0 ; $i<length($str) ; $i++ ) {
	my $c = substr($str,$i,1);
	if ( $c ge "A" && $c le "Z" ) {
	    $c = chr(ord($c)-ord("A")+ord("a"));
	} elsif ( $c eq "é" || $c eq "É" ) {
	    $t{"´"}++;
	    $c = "e";
	} elsif ( $c eq "è" || $c eq "È" ) {
	    $t{"`"}++;
	    $c = "e";
	} elsif ( $c eq "ê" || $c eq "Ê" ) {
	    $t{"^"}++;
	    $c = "e";
	}
	# Add more cases as might occur!
	$t{$c}++;
    }
    return \%t;
}

my %letter_gender = (
    # Some letters are traditionally feminine: this marks them as such.
    a => 0,    b => 0,    c => 0,    d => 0,    e => 0,
    f => 1,    g => 0,    h => 1,    i => 0,    j => 0,
    k => 0,    l => 1,    m => 1,    n => 1,    o => 0,
    p => 0,    q => 0,    r => 1,    s => 1,    t => 0,
    u => 0,    v => 0,    w => 0,    x => 0,    y => 0,
    z => 0
);

sub describe {
    # Returns the full description of a sentence but without any
    # starter blurb (“Cette phrase contient”).
    my $str = shift;
    my $oldstyle = shift;     # Use old style (Swiss) number names.
    my $tradgenders = shift;  # Use traditional letter genders.
    my $tr = countparts($str);
    my @t;
    for ( my $i=0 ; $i<26 ; $i++ ) {
	my $c = chr(ord("a")+$i);
	push @t, (name1000($tr->{$c},$oldstyle,($tradgenders&&$letter_gender{$c})) . " " . $c)
	    if $tr->{$c};
    }
    push @t, (name1000($tr->{"´"},$oldstyle,0) . " " . ($tr->{"´"}>1?"accents aigus":"accent aigu"))
	if $tr->{"´"};
    push @t, (name1000($tr->{"`"},$oldstyle,0) . " " . ($tr->{"`"}>1?"accents graves":"accent grave"))
	if $tr->{"`"};
    push @t, (name1000($tr->{"^"},$oldstyle,0) . " " . ($tr->{"^"}>1?"accents circonflexes":"accent circonflexe"))
	if $tr->{"^"};
    push @t, (name1000($tr->{" "},$oldstyle,1) . " " . ($tr->{" "}>1?"espaces":"espace"))
	if $tr->{" "};
    push @t, (name1000($tr->{"-"},$oldstyle,0) . " " . ($tr->{"-"}>1?"traits d'union":"trait d'union"))
	if $tr->{"-"};
    push @t, (name1000($tr->{"'"},$oldstyle,1) . " " . ($tr->{"'"}>1?"apostrophes":"apostrophe"))
	if $tr->{"'"};
    push @t, (name1000($tr->{","},$oldstyle,1) . " " . ($tr->{","}>1?"virgules":"virgule"))
	if $tr->{","};
    push @t, (name1000($tr->{"."},$oldstyle,0) . " " . ($tr->{"."}>1?"points":"point"))
	if $tr->{"."};
    my $descr = "";
    for ( my $j=0 ; $j<scalar(@t) ; $j++ ) {
	$descr .= ($j==0?"":$j==scalar(@t)-1?" et ":", ") . $t[$j];
    }
    $descr .= ".";
    return $descr;
}

my $bootstrap = "";
MAINLOOP: while ( 1 ) {
    # We randomly select the starter blurb, whether numbers will be
    # written old style and whether to give letters their traditional
    # gender.
    my $oldstyle = int(rand(2));
    my $tradgenders = int(rand(2));
    my $starter;
    my $quevoici = int(rand(2));
    $starter = $quevoici?"La":"Cette";
    $starter .= " ".(int(rand(2))?"petite":"jolie") if int(rand(3));
    $starter .= " phrase";
    $starter .= " ".(int(rand(2))?"auto-":"auto").(int(rand(2))?"descriptive":"référentielle") if int(rand(2));
    $starter .= ($quevoici?" que".(int(rand(2))?" voici":" vous lisez"):"");
    $starter .= " ".(int(rand(2))?"contient":"comporte");
    $starter .= " ".(int(rand(2))?(int(rand(2))?"tout ":"")."juste":(int(rand(2))?"très ":"").(int(rand(2))?"exactement":"précisément")) if int(rand(3));
    $starter .= " ";
    # Start trying with a bootstrap sentence: we use the one generated
    # at previous loop...
    my $str = $bootstrap;
    for ( my $i=0 ; $i<26 ; $i++ ) {
	# ...except that we randomly suppress some letters if they
	# aren't in the starter blurb (to avoid getting stuck with
	# something like "un b" if there is no 'b' anywhere else in
	# the sentence).
	my $c = chr(ord("a")+$i);
	if ( int(rand(3)) && $starter !~ m/$c/ ) {
	    $str =~ s/$c//g;
	}
    }
    # Now iterate describing the sentence until we loop:
    my %revtab;  # Track sentences met (at this iteration of main loop)
    for ( my $cnt=0 ;  ; $cnt++ ) {
	$revtab{$str} = $cnt;
	my $str2 = $starter . describe($str, $oldstyle, $tradgenders);
	if ( $str2 eq $str ) {
	    # Success: sentence equals its description.
	    printf STDERR "\e[32mSuccess!  Autodescriptive sentence follows:\e[0m\n";
	    print "$str\n";
	    exit 0;
	}
	if ( defined($revtab{$str2}) ) {
	    # We hit a loop: try with a different starter.
	    printf STDERR "\e[31mLoop %d->%d, giving up\e[0m\n", $revtab{$str2}, ($cnt+1);
	    printf STDERR "\e[31mFailed sentence was:\e[0m %s\n", $str2;
	    $bootstrap = $str2;
	    next MAINLOOP;
	}
	$str = $str2;
    }
}
	#! /usr/local/bin/perl -w

	# Find auto-descriptive sentences in French, such as the following:

	# "Cette phrase comporte huit a, six c, sept d, vingt-cinq e, deux f, six g, six h, vingt-sept i, deux l, deux m, quinze n, huit o, neuf p, six q, dix r, dix-huit s, vingt-trois t, seize u, six v, treize x, quatre z, cinquante-six espaces, six traits d'union, une apostrophe, vingt-quatre virgules et un point."

	# "Cette jolie phrase contient huit a, huit c, cinq d, vingt-sept e, quatre f, sept g, sept h, vingt-six i, deux j, trois l, une m, vingt-quatre n, dix o, onze p, huit q, neuf r, quinze s, vingt-neuf t, dix-huit u, sept v, cinq x, trois z, cinquante-neuf espaces, sept traits d'union, une apostrophe, vingt-cinq virgules et un point."

	# To do this, we iterate replacing a sentence by its description
	# until, hopefully, we reach a fixed point. Generally this won't
	# happen (we hit a loop instead). When this happens, we randomly
	# change the initial blurb and try iterating again with this new blurb
	# to break out of the loop.

	use strict;
	use warnings;
	use utf8;

	use open IN => ':utf8';
	binmode STDIN, ":utf8";
	binmode STDERR, ":utf8";
	binmode STDOUT, ":utf8";

	my @units = ( "zéro", "un", "deux", "trois", "quatre",
	"cinq", "six", "sept", "huit", "neuf" );
	my @teens = ( "dix", "onze", "douze", "treize", "quatorze",
	"quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" );
	my @tens = ( "", "dix", "vingt", "trente", "quarante",
	"cinquante", "soixante", "septante", "huitante", "nonante" );

	sub name100 {
	# Return the French name of $n (with $n<100), old (Swiss) style if
	# $oldstyle, feminine if $feminine.
	use integer;
	my $n = shift;
	my $oldstyle = shift;
	my $feminine = shift;
	die "Bad input to name100" unless $n >= 0 && $n < 100 && $n == ($n/10)*10+($n%10);
	if ( $n < 10 ) {
	return $units[$n] . ($feminine && ($n%10==1) ? "e" : "");
	} elsif ( $n < 20 ) {
	return $teens[$n%10];
	} elsif ( $oldstyle \|\| $n < 70 ) {
	my $s = $tens[$n/10];
	if ( $n%10 == 1 ) {
	$s .= " et ".$units[$n%10] . ($feminine ? "e" : "");
	} elsif ( $n%10 >= 2 ) {
	$s .= "-".$units[$n%10];
	}
	return $s;
	} elsif ( $n < 80 ) {
	my $s = $tens[6];
	if ( $n%10 == 1 ) {
	$s .= " et ".$teens[$n%10];
	} else {
	$s .= "-".$teens[$n%10];
	}
	return $s;
	} else {
	my $s = "quatre-vingt";
	if ( $n%20 == 0 ) {
	$s .= "s";
	} elsif ( $n%20 < 10 ) {
	$s .= "-".$units[$n%10] . ($feminine && ($n%10==1) ? "e" : "");
	} else {
	$s .= "-".$teens[$n%10];
	}
	return $s;
	}
	}

	sub name1000 {
	# Return the French name of $n (with $n<1000), old (Swiss) style if
	# $oldstyle, feminine if $feminine.
	use integer;
	my $n = shift;
	my $oldstyle = shift;
	my $feminine = shift;
	die "Bad input to name1000" unless $n >= 0 && $n < 1000 && $n == ($n/100)*100+($n%100);
	if ( $n < 100 ) {
	return name100($n,$oldstyle,$feminine);
	} elsif ( $n < 200 ) {
	my $s = "cent";
	$s .= " ".name100($n%100,$oldstyle,$feminine) if $n%100;
	return $s;
	} else {
	my $s = $units[$n/100] . " cent";
	if ( $n%100 == 0 ) {
	$s .= "s";
	} else {
	$s .= " ".name100($n%100,$oldstyle,$feminine);
	}
	return $s;
	}
	}

	sub countparts {
	# Return a reference to a hash counting the number of different
	# items in a sentence (mostly characters, but letters are
	# normalized to lowercase, and accents are counted separately).
	my $str = shift;
	my %t;
	for ( my $i=0 ; $i<length($str) ; $i++ ) {
	my $c = substr($str,$i,1);
	if ( $c ge "A" && $c le "Z" ) {
	$c = chr(ord($c)-ord("A")+ord("a"));
	} elsif ( $c eq "é" \|\| $c eq "É" ) {
	$t{"´"}++;
	$c = "e";
	} elsif ( $c eq "è" \|\| $c eq "È" ) {
	$t{"`"}++;
	$c = "e";
	} elsif ( $c eq "ê" \|\| $c eq "Ê" ) {
	$t{"^"}++;
	$c = "e";
	}
	# Add more cases as might occur!
	$t{$c}++;
	}
	return \%t;
	}

	my %letter_gender = (
	# Some letters are traditionally feminine: this marks them as such.
	a => 0, b => 0, c => 0, d => 0, e => 0,
	f => 1, g => 0, h => 1, i => 0, j => 0,
	k => 0, l => 1, m => 1, n => 1, o => 0,
	p => 0, q => 0, r => 1, s => 1, t => 0,
	u => 0, v => 0, w => 0, x => 0, y => 0,
	z => 0
	);

	sub describe {
	# Returns the full description of a sentence but without any
	# starter blurb (“Cette phrase contient”).
	my $str = shift;
	my $oldstyle = shift; # Use old style (Swiss) number names.
	my $tradgenders = shift; # Use traditional letter genders.
	my $tr = countparts($str);
	my @t;
	for ( my $i=0 ; $i<26 ; $i++ ) {
	my $c = chr(ord("a")+$i);
	push @t, (name1000($tr->{$c},$oldstyle,($tradgenders&&$letter_gender{$c})) . " " . $c)
	if $tr->{$c};
	}
	push @t, (name1000($tr->{"´"},$oldstyle,0) . " " . ($tr->{"´"}>1?"accents aigus":"accent aigu"))
	if $tr->{"´"};
	push @t, (name1000($tr->{"`"},$oldstyle,0) . " " . ($tr->{"`"}>1?"accents graves":"accent grave"))
	if $tr->{"`"};
	push @t, (name1000($tr->{"^"},$oldstyle,0) . " " . ($tr->{"^"}>1?"accents circonflexes":"accent circonflexe"))
	if $tr->{"^"};
	push @t, (name1000($tr->{" "},$oldstyle,1) . " " . ($tr->{" "}>1?"espaces":"espace"))
	if $tr->{" "};
	push @t, (name1000($tr->{"-"},$oldstyle,0) . " " . ($tr->{"-"}>1?"traits d'union":"trait d'union"))
	if $tr->{"-"};
	push @t, (name1000($tr->{"'"},$oldstyle,1) . " " . ($tr->{"'"}>1?"apostrophes":"apostrophe"))
	if $tr->{"'"};
	push @t, (name1000($tr->{","},$oldstyle,1) . " " . ($tr->{","}>1?"virgules":"virgule"))
	if $tr->{","};
	push @t, (name1000($tr->{"."},$oldstyle,0) . " " . ($tr->{"."}>1?"points":"point"))
	if $tr->{"."};
	my $descr = "";
	for ( my $j=0 ; $j<scalar(@t) ; $j++ ) {
	$descr .= ($j==0?"":$j==scalar(@t)-1?" et ":", ") . $t[$j];
	}
	$descr .= ".";
	return $descr;
	}

	my $bootstrap = "";
	MAINLOOP: while ( 1 ) {
	# We randomly select the starter blurb, whether numbers will be
	# written old style and whether to give letters their traditional
	# gender.
	my $oldstyle = int(rand(2));
	my $tradgenders = int(rand(2));
	my $starter;
	my $quevoici = int(rand(2));
	$starter = $quevoici?"La":"Cette";
	$starter .= " ".(int(rand(2))?"petite":"jolie") if int(rand(3));
	$starter .= " phrase";
	$starter .= " ".(int(rand(2))?"auto-":"auto").(int(rand(2))?"descriptive":"référentielle") if int(rand(2));
	$starter .= ($quevoici?" que".(int(rand(2))?" voici":" vous lisez"):"");
	$starter .= " ".(int(rand(2))?"contient":"comporte");
	$starter .= " ".(int(rand(2))?(int(rand(2))?"tout ":"")."juste":(int(rand(2))?"très ":"").(int(rand(2))?"exactement":"précisément")) if int(rand(3));
	$starter .= " ";
	# Start trying with a bootstrap sentence: we use the one generated
	# at previous loop...
	my $str = $bootstrap;
	for ( my $i=0 ; $i<26 ; $i++ ) {
	# ...except that we randomly suppress some letters if they
	# aren't in the starter blurb (to avoid getting stuck with
	# something like "un b" if there is no 'b' anywhere else in
	# the sentence).
	my $c = chr(ord("a")+$i);
	if ( int(rand(3)) && $starter !~ m/$c/ ) {
	$str =~ s/$c//g;
	}
	}
	# Now iterate describing the sentence until we loop:
	my %revtab; # Track sentences met (at this iteration of main loop)
	for ( my $cnt=0 ; ; $cnt++ ) {
	$revtab{$str} = $cnt;
	my $str2 = $starter . describe($str, $oldstyle, $tradgenders);
	if ( $str2 eq $str ) {
	# Success: sentence equals its description.
	printf STDERR "\e[32mSuccess! Autodescriptive sentence follows:\e[0m\n";
	print "$str\n";
	exit 0;
	}
	if ( defined($revtab{$str2}) ) {
	# We hit a loop: try with a different starter.
	printf STDERR "\e[31mLoop %d->%d, giving up\e[0m\n", $revtab{$str2}, ($cnt+1);
	printf STDERR "\e[31mFailed sentence was:\e[0m %s\n", $str2;
	$bootstrap = $str2;
	next MAINLOOP;
	}
	$str = $str2;
	}
	}