Skip to content

Instantly share code, notes, and snippets.

@akovbovich

akovbovich/myfmt Secret

Created January 29, 2014 11:27
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akovbovich/a6b6de7ce540b9424f37 to your computer and use it in GitHub Desktop.
Save akovbovich/a6b6de7ce540b9424f37 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
##############################################################################
# #
# run: perl myfmt input-file output-file #
# #
# (c) Andrey Kovbovich < akovbovich+tolstoy < at > gmail.com >, 2014 #
# #
##############################################################################
use strict;
use warnings;
use List::Util 'reduce';
# --------------------------------- settings ---------------------------------
my $N = 80; # line width in columns
my $pad = 4; # new paragraph padding
my $N2 = $N/2; # header max length
die "invalid N:$N, expected [20-120]\n"
unless 20 <= $N and $N <= 120;
die "invalid padding\n"
unless $pad < $N;
die <<"" unless @ARGV == 2;
usage: perl $0 input-file output-file\n
# ----------------------------------- defs -----------------------------------
my ($tag, $word, $len) =
( 0, 1, 2);
my ($word_tag, $lf_tag, $eof_tag) =
( 0, 1, 2);
# ----------------------------------- aux ------------------------------------
our ($a, $b);
sub justify {
my ($ofs, @line) = @_;
my $nwords = scalar @line;
my $nchars = reduce { $a + $b->[$len] } ($nwords - 1) + $ofs, @line;
my $nspace = $N - $nchars;
my @padmap = (0, map { 1 } 1 .. $nwords - 1);
for (my $n = 1; $nwords > 1 and $n <= $nspace; $n++) {
$padmap[-(1 + ($n - 1) % ($nwords - 1))] ++;
}
my $s = '';
for (my $i = 0; $i < $nwords; $i++) {
$s .= ' ' x $padmap[$i] . $line[$i][$word];
}
$s . "\n";
}
sub cat {
my @words = @_;
join ' ', map { $_->[$word] } @words;
}
sub fmt3 {
my ($s, $ofs, @words) = @_;
for (my ($ncol, @line) = ($N-$ofs,); $ncol >= 0;) {
$ncol-- if @line;
if (my $elt = shift @words) {
if ($ncol >= $elt->[$len]) {
push @line, $elt;
$ncol -= $elt->[$len];
}
else {
unshift @words, $elt;
$s .= justify($ofs, @line);
@_ = ($s, 0, @words); goto &fmt3;
}
}
else {
$s .= cat(@line) . "\n";
last;
}
}
$s;
}
sub fmt {
my $buf = shift;
return '' unless
my @words = grep { $_->[$tag] == $word_tag } @{$buf->{words}};
my $s = ' ' x $pad;
if ($buf->{nchars} + $buf->{nspace} + $pad < $N2) {
return
$s . cat(@words) . "\n";
}
fmt3($s, length $s, @words);
}
sub is_para {
my $words = shift;
return 1 if $$words[-1][$tag] == $eof_tag;
if (@$words >= 2) {
return 1 if
$$words[-2][$tag] == $word_tag and
$$words[-1][$tag] == $lf_tag and
$$words[-2][$word] =~ /[\.\!\?]$/;
}
0;
}
sub mk_buf {
+{
nchars => 0,
nspace => 0,
words => [],
};
}
# ----------------------------------- main ----------------------------------
open my $in, '<:utf8', $ARGV[0] or die "cannot open $ARGV[0]: $!\n";
open my $out, '>:utf8', $ARGV[1] or die "cannot open $ARGV[1]: $!\n";
my $buf = mk_buf;
while (<$in>) {
chomp;
unless (my @tokens = split /\s+/) {
print $out fmt($buf), "\n";
$buf = mk_buf;
}
else {
for (@tokens) {
$buf->{nchars} += length;
$buf->{nspace} += 1 if @{$buf->{words}};
push @{$buf->{words}}, [$word_tag, $_, length];
}
push @{$buf->{words}}, [eof() ? $eof_tag : $lf_tag];
if (is_para($buf->{words})) {
print $out fmt($buf);
$buf = mk_buf;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment