-
-
Save akovbovich/a6b6de7ce540b9424f37 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
############################################################################## | |
# # | |
# run: perl myfmt input-file output-file # | |
# # | |
# (c) Andrey Kovbovich < akovbovich+tolstoy < at > gmail.com >, 2014 # | |
# # | |
############################################################################## | |
use strict; | |
use warnings; | |
use List::Util 'reduce'; | |
# --------------------------------- settings --------------------------------- | |
my $N = 80; # line width in columns | |
my $pad = 4; # new paragraph padding | |
my $N2 = $N/2; # header max length | |
die "invalid N:$N, expected [20-120]\n" | |
unless 20 <= $N and $N <= 120; | |
die "invalid padding\n" | |
unless $pad < $N; | |
die <<"" unless @ARGV == 2; | |
usage: perl $0 input-file output-file\n | |
# ----------------------------------- defs ----------------------------------- | |
my ($tag, $word, $len) = | |
( 0, 1, 2); | |
my ($word_tag, $lf_tag, $eof_tag) = | |
( 0, 1, 2); | |
# ----------------------------------- aux ------------------------------------ | |
our ($a, $b); | |
sub justify { | |
my ($ofs, @line) = @_; | |
my $nwords = scalar @line; | |
my $nchars = reduce { $a + $b->[$len] } ($nwords - 1) + $ofs, @line; | |
my $nspace = $N - $nchars; | |
my @padmap = (0, map { 1 } 1 .. $nwords - 1); | |
for (my $n = 1; $nwords > 1 and $n <= $nspace; $n++) { | |
$padmap[-(1 + ($n - 1) % ($nwords - 1))] ++; | |
} | |
my $s = ''; | |
for (my $i = 0; $i < $nwords; $i++) { | |
$s .= ' ' x $padmap[$i] . $line[$i][$word]; | |
} | |
$s . "\n"; | |
} | |
sub cat { | |
my @words = @_; | |
join ' ', map { $_->[$word] } @words; | |
} | |
sub fmt3 { | |
my ($s, $ofs, @words) = @_; | |
for (my ($ncol, @line) = ($N-$ofs,); $ncol >= 0;) { | |
$ncol-- if @line; | |
if (my $elt = shift @words) { | |
if ($ncol >= $elt->[$len]) { | |
push @line, $elt; | |
$ncol -= $elt->[$len]; | |
} | |
else { | |
unshift @words, $elt; | |
$s .= justify($ofs, @line); | |
@_ = ($s, 0, @words); goto &fmt3; | |
} | |
} | |
else { | |
$s .= cat(@line) . "\n"; | |
last; | |
} | |
} | |
$s; | |
} | |
sub fmt { | |
my $buf = shift; | |
return '' unless | |
my @words = grep { $_->[$tag] == $word_tag } @{$buf->{words}}; | |
my $s = ' ' x $pad; | |
if ($buf->{nchars} + $buf->{nspace} + $pad < $N2) { | |
return | |
$s . cat(@words) . "\n"; | |
} | |
fmt3($s, length $s, @words); | |
} | |
sub is_para { | |
my $words = shift; | |
return 1 if $$words[-1][$tag] == $eof_tag; | |
if (@$words >= 2) { | |
return 1 if | |
$$words[-2][$tag] == $word_tag and | |
$$words[-1][$tag] == $lf_tag and | |
$$words[-2][$word] =~ /[\.\!\?]$/; | |
} | |
0; | |
} | |
sub mk_buf { | |
+{ | |
nchars => 0, | |
nspace => 0, | |
words => [], | |
}; | |
} | |
# ----------------------------------- main ---------------------------------- | |
open my $in, '<:utf8', $ARGV[0] or die "cannot open $ARGV[0]: $!\n"; | |
open my $out, '>:utf8', $ARGV[1] or die "cannot open $ARGV[1]: $!\n"; | |
my $buf = mk_buf; | |
while (<$in>) { | |
chomp; | |
unless (my @tokens = split /\s+/) { | |
print $out fmt($buf), "\n"; | |
$buf = mk_buf; | |
} | |
else { | |
for (@tokens) { | |
$buf->{nchars} += length; | |
$buf->{nspace} += 1 if @{$buf->{words}}; | |
push @{$buf->{words}}, [$word_tag, $_, length]; | |
} | |
push @{$buf->{words}}, [eof() ? $eof_tag : $lf_tag]; | |
if (is_para($buf->{words})) { | |
print $out fmt($buf); | |
$buf = mk_buf; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment