Skip to content

Instantly share code, notes, and snippets.

@bpj
Created March 27, 2015 09:54
Show Gist options
  • Save bpj/2a5639a0005c0124a2d4 to your computer and use it in GitHub Desktop.
Save bpj/2a5639a0005c0124a2d4 to your computer and use it in GitHub Desktop.
A pandoc filter to format poetry nicely for HTML
#!/usr/bin/env perl
#----------------------------------------------------------------------
# pandoc-poetry.pl
# ================
#
# A pandoc filter to format poetry nicely for HTML.
#
# See <https://groups.google.com/d/msg/pandoc-discuss/_JnTJnsSK3k/SkM9tjfYyg0J>
#
# USAGE
# -----
#
# pandoc -F pandoc-poetry.pl [OPTIONS] verse.md -o verse.html
#
# INPUT FORMAT
# ------------
#
# ````pandoc-markdown
# <div class="verse">
#
# | Line of poetry goes here
# | Second line here
# | And so on, throughout the poem,
#
# even over stanza breaks.
#
# </div>
# ````
# The contents of paragraphs inside divs with class "verse"
# are 'split' at hard line breaks and the pieces are wrapped
# in spans with class "line". The paragraph is then itself
# wrapped in a div with class "stanza", which allows
# formatting the layout of verse with CSS
#
# XXX:
# Multi-line stanzas should be line-blocks
# but single-line stanzas must be 'normal' paras.
# because Pandoc doesn't recognise single-line line-blocks
# (arguably a bug in Pandoc!
#
# OUTPUT HTML
# -----------
#
# ````html
# <div class="verse">
# <div class="stanza">
# <p>
# <span class="line">Line of poetry goes here</span>
# <span class="line">Second line here</span>
# <span class="line">And so on, throughout the poem,</span>
# </p>
# </div>
# <div class="stanza">
# <p>
# <span class="line">even over stanza breaks.</span>
# </p>
# </div>
# </div>
# ````
# Not *quite* as the OP wished, because
# you cannot put classes on `<p>` elements with Pandoc.
# Use a selector `div.stanza p` to style stanzas with CSS!
#
# Copyright 2015- Benct Philip Jonsson.
#
# This script is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#----------------------------------------------------------------------
# use 5.014;
use strict;
use warnings; # FATAL => 'all';
use utf8; # No UTF-8 I/O with JSON!
use autodie 2.12;
# no indirect;
# no autovivification; # Don't pullute the AST!
# use Getopt::Long qw[ GetOptionsFromArray :config no_ignore_case ];
# The following two modules must be installed from CPAN;
# see <http://www.cpan.org/modules/INSTALL.html>
use JSON::MaybeXS qw[ decode_json encode_json ]; # Choose best available implementation.
use Data::Rmap qw[ rmap_hash ]; # Data structure traversal support.
# HELPER FUNCTIONS # {{{1}}}
sub is_elem {
my ( $tag, $elem ) = @_;
return !!0 unless 'HASH' eq ref $elem;
return !!0 unless exists $elem->{t};
return !!0 unless exists $elem->{c};
return !!0 unless $tag eq $elem->{t};
return !!1;
} ## end sub is_elem
sub _mk_elem { # {{{2}}}
my($type => $contents) = @_;
return +{ t => $type, c => $contents };
}
use constant NL => _mk_elem( Str => "\n" );
sub _mk_stanza {
return _mk_elem( Div => [ [ "", ['stanza'], [] ], [ _mk_elem( Para => shift ) ] ] );
}
sub _mk_line {
return( NL, _mk_elem( Span => [ [ "", ['line'], [] ], shift ] ) );
}
# Traverse document: # {{{1}}}
my $to_format = shift @ARGV;
my $doc = decode_json do { local $/; <>; };
# Change elements in-place: # {{{2}}}
rmap_hash {
return unless is_elem( Div => $_ );
return unless defined $_->{c};
return unless 'verse' eq $_->{c}[-2][1][0]; # First class!
verse($_);
return;
} $doc;
sub verse {
my($verse) = @_; # A Div with class "verse"
for my $stanza ( @{$verse->{c}[-1]} ) { # Loop through contents of $verse
is_elem( Para => $stanza ) or return; # Skip unless it's a Para
$stanza = do {
# Localize $_ so we don't clobber the reference to the $verse
local $_;
my @lines = ( my $line = [] ); # Construct a list of lists (lines)
for my $inline ( @{ $stanza->{c} } ) { # Loop through contents of $stanza
if ( is_elem( LineBreak => $inline ) ) { # We found a LineBreak
# Discard LB and start a new inner list/line
push @lines, ( $line = [] );
}
else {
# Add a non-LineBreak to the current inner list/line
push @$line, $inline;
}
} ## end for my $inline ( @{ $stanza...})
# Turn each non-empty inner list into a Span with class "line"
# preceded by a literal newline!
@lines = map { scalar(@$_) ? _mk_line( $_ ) : () } @lines;
push @lines, NL; # add a final literal newline
# Make a Para with the list of line-Spans as contents
# wrap it in a Div with class 'stanza'
# and replace the original Para with it!
_mk_stanza( \@lines );
};
}
}
print {*STDOUT} encode_json $doc;
# <section class="verse">
# <p class="stanza">
# <span class="line">Line of poetry goes here</span>
# <span class="line">Second line here</span>
# <span class="line">And so on, throughout the poem,</span>
# </p>
# <p class="stanza">
# <span class="line">even over stanza breaks.</span>
# </p>
# </section>
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment