akovbovich/myfmt Secret

## myfmt
#!/usr/bin/env perl

##############################################################################
#                                                                            #
# run: perl myfmt input-file output-file                                     #
#                                                                            #
# (c) Andrey Kovbovich < akovbovich+tolstoy < at > gmail.com >, 2014         #
#                                                                            #
##############################################################################

use strict;
use warnings;
use List::Util 'reduce';

# --------------------------------- settings ---------------------------------

my $N         = 80;          # line width in columns
my $pad       = 4;           # new paragraph padding
my $N2        = $N/2;        # header max length

die "invalid N:$N, expected [20-120]\n"
   unless 20 <= $N and $N <= 120;

die "invalid padding\n"
   unless $pad < $N;

die <<"" unless @ARGV == 2;
usage: perl $0 input-file output-file\n

# ----------------------------------- defs -----------------------------------

my ($tag, $word, $len) =
   (   0,     1,    2);

my ($word_tag, $lf_tag, $eof_tag) =
   (        0,       1,        2);

# ----------------------------------- aux ------------------------------------
our ($a, $b);

sub justify {
   my ($ofs, @line) = @_;

   my $nwords = scalar @line;
   my $nchars = reduce { $a + $b->[$len] } ($nwords - 1) + $ofs, @line;
   my $nspace = $N - $nchars;
   my @padmap = (0, map { 1 } 1 .. $nwords - 1);

   for (my $n = 1; $nwords > 1 and $n <= $nspace; $n++) {
      $padmap[-(1 + ($n - 1) % ($nwords - 1))] ++;
   }

   my $s = '';
   for (my $i = 0; $i < $nwords; $i++) {
      $s .= ' ' x $padmap[$i] . $line[$i][$word];
   }

   $s . "\n";
}

sub cat {
   my @words = @_;
   join ' ', map { $_->[$word] } @words;
}

sub fmt3 {
   my ($s, $ofs, @words) = @_;

   for (my ($ncol, @line) = ($N-$ofs,); $ncol >= 0;) {
      $ncol-- if @line;
      if (my $elt = shift @words) {
         if ($ncol >= $elt->[$len]) {
            push @line, $elt;
            $ncol -= $elt->[$len];
         }
         else {
            unshift @words, $elt;
            $s .= justify($ofs, @line);
            @_ = ($s, 0, @words); goto &fmt3;
         }
      }
      else {
         $s .= cat(@line) . "\n";
         last;
      }
   }

   $s;
}

sub fmt {
   my $buf = shift;
   return '' unless
      my @words = grep { $_->[$tag] == $word_tag } @{$buf->{words}};

   my $s = ' ' x $pad;

   if ($buf->{nchars} + $buf->{nspace} + $pad < $N2) {
      return
         $s . cat(@words) . "\n";
   }

   fmt3($s, length $s, @words);
}

sub is_para {
   my $words = shift;

   return 1 if $$words[-1][$tag] == $eof_tag;

   if (@$words >= 2) {
      return 1 if
         $$words[-2][$tag] == $word_tag and
         $$words[-1][$tag] == $lf_tag   and
         $$words[-2][$word] =~ /[\.\!\?]$/;
   }

   0;
}

sub mk_buf {
   +{
      nchars => 0,
      nspace => 0,
      words  => [],
    };
}

# ----------------------------------- main ----------------------------------

open my $in,  '<:utf8', $ARGV[0] or die "cannot open $ARGV[0]: $!\n";
open my $out, '>:utf8', $ARGV[1] or die "cannot open $ARGV[1]: $!\n";

my $buf = mk_buf;

while (<$in>) {
   chomp;
   unless (my @tokens = split /\s+/) {
      print $out fmt($buf), "\n";
      $buf = mk_buf;
   }
   else {
      for (@tokens) {
         $buf->{nchars} += length;
         $buf->{nspace} += 1 if @{$buf->{words}};
         push @{$buf->{words}}, [$word_tag, $_, length];
      }
      push @{$buf->{words}}, [eof() ? $eof_tag : $lf_tag];
      if (is_para($buf->{words})) {
         print $out fmt($buf);
         $buf = mk_buf;
      }
   }
}
	#!/usr/bin/env perl

	##############################################################################
	# #
	# run: perl myfmt input-file output-file #
	# #
	# (c) Andrey Kovbovich < akovbovich+tolstoy < at > gmail.com >, 2014 #
	# #
	##############################################################################

	use strict;
	use warnings;
	use List::Util 'reduce';

	# --------------------------------- settings ---------------------------------

	my $N = 80; # line width in columns
	my $pad = 4; # new paragraph padding
	my $N2 = $N/2; # header max length

	die "invalid N:$N, expected [20-120]\n"
	unless 20 <= $N and $N <= 120;

	die "invalid padding\n"
	unless $pad < $N;

	die <<"" unless @ARGV == 2;
	usage: perl $0 input-file output-file\n

	# ----------------------------------- defs -----------------------------------

	my ($tag, $word, $len) =
	( 0, 1, 2);

	my ($word_tag, $lf_tag, $eof_tag) =
	( 0, 1, 2);

	# ----------------------------------- aux ------------------------------------
	our ($a, $b);

	sub justify {
	my ($ofs, @line) = @_;

	my $nwords = scalar @line;
	my $nchars = reduce { $a + $b->[$len] } ($nwords - 1) + $ofs, @line;
	my $nspace = $N - $nchars;
	my @padmap = (0, map { 1 } 1 .. $nwords - 1);

	for (my $n = 1; $nwords > 1 and $n <= $nspace; $n++) {
	$padmap[-(1 + ($n - 1) % ($nwords - 1))] ++;
	}

	my $s = '';
	for (my $i = 0; $i < $nwords; $i++) {
	$s .= ' ' x $padmap[$i] . $line[$i][$word];
	}

	$s . "\n";
	}

	sub cat {
	my @words = @_;
	join ' ', map { $_->[$word] } @words;
	}

	sub fmt3 {
	my ($s, $ofs, @words) = @_;

	for (my ($ncol, @line) = ($N-$ofs,); $ncol >= 0;) {
	$ncol-- if @line;
	if (my $elt = shift @words) {
	if ($ncol >= $elt->[$len]) {
	push @line, $elt;
	$ncol -= $elt->[$len];
	}
	else {
	unshift @words, $elt;
	$s .= justify($ofs, @line);
	@_ = ($s, 0, @words); goto &fmt3;
	}
	}
	else {
	$s .= cat(@line) . "\n";
	last;
	}
	}

	$s;
	}

	sub fmt {
	my $buf = shift;
	return '' unless
	my @words = grep { $_->[$tag] == $word_tag } @{$buf->{words}};

	my $s = ' ' x $pad;

	if ($buf->{nchars} + $buf->{nspace} + $pad < $N2) {
	return
	$s . cat(@words) . "\n";
	}

	fmt3($s, length $s, @words);
	}

	sub is_para {
	my $words = shift;

	return 1 if $$words[-1][$tag] == $eof_tag;

	if (@$words >= 2) {
	return 1 if
	$$words[-2][$tag] == $word_tag and
	$$words[-1][$tag] == $lf_tag and
	$$words[-2][$word] =~ /[\.\!\?]$/;
	}

	0;
	}

	sub mk_buf {
	+{
	nchars => 0,
	nspace => 0,
	words => [],
	};
	}

	# ----------------------------------- main ----------------------------------

	open my $in, '<:utf8', $ARGV[0] or die "cannot open $ARGV[0]: $!\n";
	open my $out, '>:utf8', $ARGV[1] or die "cannot open $ARGV[1]: $!\n";

	my $buf = mk_buf;

	while (<$in>) {
	chomp;
	unless (my @tokens = split /\s+/) {
	print $out fmt($buf), "\n";
	$buf = mk_buf;
	}
	else {
	for (@tokens) {
	$buf->{nchars} += length;
	$buf->{nspace} += 1 if @{$buf->{words}};
	push @{$buf->{words}}, [$word_tag, $_, length];
	}
	push @{$buf->{words}}, [eof() ? $eof_tag : $lf_tag];
	if (is_para($buf->{words})) {
	print $out fmt($buf);
	$buf = mk_buf;
	}
	}
	}