Created September 17, 2014 17:10
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
no warnings qw[ uninitialized numeric ];
# Fake id and/or class attributes in or at the end of the title string of pandoc image links.
# Gets wrapped in a div/span with the id/class(es) as appropriate.
# In LaTeX pandoc 1.13.1 will insert a \hyperdef with the id as label.
# Thus you can link to it as you wish and it will work in HTML and LaTeX alike!
# $ pandoc -F ./ -w html
# ![caption](image "title {#fig:ref}")
# ^D
# <div id="fig:ref">
# <div class="figure">
# <img src="image" title="title" alt="caption" /><p class="caption">caption</p>
# </div>
# </div>
# $ pandoc -F ./ -w latex
# ![caption](image "title {#fig:ref}")
# ^D
# \hyperdef{}{fig:ref}{}
# \begin{figure}[htbp]
# \centering
# \includegraphics{image}
# \caption{caption}
# \end{figure}
# $ pandoc -F ./ -w html
# ![caption](image "title {#id .class1 .class2}")
# ^D
# <div id="id" class="class1 class2">
# <div class="figure">
# <img src="image" title="title" alt="caption" /><p class="caption">caption</p>
# </div>
# </div>
# $ pandoc -F ./ -w html
# Text ![alttext](image "title {#id .class1 .class2 }") and more text
# ^D
# <p>Text <span id="id" class="class1 class2"><img src="image" title="title" alt="alttext" /></span> and more text</p>
# $ pandoc -F ./ -w html
# ![caption](img "{#id}")
# ^D
# <div id="id">
# <div class="figure">
# <img src="img" alt="caption" /><p class="caption">caption</p>
# </div>
# </div>
use utf8; # No UTF-8 I/O with!
use autodie 2.12;
no indirect;
no autovivification; # Don't pullute the AST!
# use Getopt::Long qw[ GetOptionsFromArray :config no_ignore_case ];
use JSON qw[ decode_json encode_json ];
use Data::Rmap qw[ rmap_hash cut ]; # Data structure traversal support.
my $attr_re = qr[
\s* # optional whitespace
{ # followed by an opening brace
(?:\#(\S+))? # 1 an optional id
( # 2 optionally followed by
\s+ # whitespace
\..+ # and one or more classes
} # then a closing brace
\z # and the end of string
my $to_format = shift @ARGV;
my $doc = decode_json do { local $/; <>; };
# Change elements in-place
rmap_hash {
my($img, $para, @classes);
if ( 'Para' eq $_->{t} ) {
$para = $_;
return unless 1 == @{ $para->{c} };
return unless 'Image' eq $para->{c}[0]{t};
$img = $para->{c}[0];
return unless $img->{c}[1][1] =~ /\Afig:/; # The title has a 'fig:' flag already!
elsif ( 'Image' eq $_->{t} ) {
$img = $_;
else { return }
return unless $img->{c}[1][1] =~ s/$attr_re//; # title contains attributes
my $id = $1;
my $class = $2;
return unless length $id or length $class;
if ( $class ) {
$class =~ s/\s+\z//; # Trim trailing whitespace
@classes = grep { length $_ } split /\s+\./, $class;
my $tag = $para ? 'Div' : 'Span';
my $container = +{ t => $tag, c => [ [ $id, \@classes, [] ], [ $para || $img ] ] };
$_ = $container;
} $doc;
print {*STDOUT} encode_json $doc;
bpj commented Sep 17, 2014

Change line #96 from

my $id = $1; 


my $id = $1 || q{}; # Can't be undef for Pandoc's sake!

or pandoc will complain when there is no id!

