bpj/pandoc-collect-floats.pl

## pandoc-collect-floats.pl
#!/usr/bin/env perl

=pod

Pandoc filter which emulate the LaTeX endfloat package by extracting all
elements which would be LaTeX floats (figures and tables) from a
document and putting them in div with the id "figures" or "tables"
respectively. You must mark the points in the document where you want
the floats to go with a paragraph containing *only* the text
"FiguresHere" or "TablesHere" -- exactly as written here in CamelCase --
or you will lose the floats! If there are several paragraphs with the
sentinel texts only the one first found will be replaced with a div
containing the figures/tables.

Additionally a paragraph with the text "[Figure %d about here.]" or
"[Table %d about here.]" is inserted into the document where the
figure/table used to be, with "%d" being the number of figures/tables
found so far; thus it is not and cannot be guaranteed to be the same
number as LaTeX would have assigned!

Reference: <https://groups.google.com/d/topic/pandoc-discuss/jLUuYFcRDtk/discussion>

This filter requires perl interpreter and the
JSON::MaybeXS and Data::Rmap modules to run.

Most operating systems other than Windows come with perl already installed.
If you are on Windows I recommend downloading and installing
Strawberry Perl: <http://strawberryperl.com>.

If/once you have perl installed run the following commands:

    cpan App::cpanminus

    cpanm JSON::MaybeXS Data::Rmap

Then run pandoc with the filter:

    pandoc -F ./pandoc-collect-floats.pl [OPTIONS] INPUTFILE

=cut

use utf8;        # so literals and identifiers can be in UTF-8
use strict;      # quote strings, declare variables
use warnings;    # on by default

use JSON::MaybeXS qw[ decode_json encode_json ];
use Data::Rmap qw[ rmap_hash ];

my $format = shift @ARGV;

my $json = do { local $/; <>; };
my $doc = decode_json( $json );

my %floats = (    #
    figures     => [],
    saw_figures => 0,
    tables      => [],
    saw_tables  => 0,
);

rmap_hash {
    return unless exists $_->{t} and exists $_->{c};
    my $elem = $_;
    if ( 'Para' eq $elem->{t} ) {
        return unless 1 == @{ $elem->{c} };
        if ( 'Image' eq $elem->{c}[0]{t} ) {
            return unless $elem->{c}[0]{c}[-1][1] =~ /^fig\:/;
            push @{ $floats{figures} }, $elem;
            my $count = @{ $floats{figures} };
            $_ = +{
                t => 'Para',
                c => [ +{ t => 'Str', c => "[Figure $count about here.]" } ],
            };
        }
        elsif ( 'Str' eq $elem->{c}[0]{t} ) {
            return unless $elem->{c}[0]{c} =~ /^(Figures|Tables)Here$/;
            my $id = lc "collected-$1";
            $_ = +{ t => 'Div', c => [ [ $id, [], [] ], $floats{$id} ], };
        }
    }
    elsif ( 'Table' eq $elem->{t} ) {
        push @{ $floats{tables} }, $elem;
        my $count = @{ $floats{tables} };
        $_ = +{
            t => 'Para',
            c => [ +{ t => 'Str', c => "[Table $count about here.]" } ],
        };
    }
    return;
}
$doc;

print encode_json( $doc );


## pandoc-collect-floats.py
#!/usr/bin/env python

"""
Pandoc filter which emulate the LaTeX endfloat package by extracting all
elements which would be LaTeX floats (figures and tables) from a
document and putting them in div with the id "figures" or "tables"
respectively. You must mark the points in the document where you want
the floats to go with a paragraph containing *only* the text
"FiguresHere" or "TablesHere" -- exactly as written here in CamelCase --
or you will lose the floats! If there are several paragraphs with the
sentinel texts only the one first found will be replaced with a div
containing the figures/tables.

Additionally a paragraph with the text "[Figure %d about here.]" or
"[Table %d about here.]" is inserted into the document where the
figure/table used to be, with "%d" being the number of figures/tables
found so far; thus it is not and cannot be guaranteed to be the same
number as LaTeX would have assigned!

Reference: <https://groups.google.com/d/topic/pandoc-discuss/jLUuYFcRDtk/discussion>

This filter requires the pandocfilters module to be installed. You can
clone or download it from GitHub (with instructions for installing and
how to use filters): https://github.com/jgm/pandocfilters or install
from PyPI::

    pip install pandocfilters

If you have an earlier version installed you may need to do::

    pip install -U pandocfilters
"""

from pandocfilters import toJSONFilter, Div, Image, Para, Str, Table

floats = {
    'figures': [],
    'saw_figures': None,
    'tables': [],
    'saw_tables': None
}

def collect_floats(eltype, eldata, fmt, meta):
    global floats
    if eltype == 'Para':
        if len(eldata) != 1:
            return None
        elem = eldata[0];
        if elem['t'] == 'Image':
            if elem['c'][-1][1].startswith('fig:'): # title
                floats['figures'].append(Para(eldata))
                filler = "[Figure %d about here.]" % len(floats['figures'])
                return Para([Str(filler)])
        elif elem['t'] == 'Str':
            text = elem['c']
            if elem['c'] == 'FiguresHere':
                if floats['saw_figures']:
                    return None
                floats['saw_figures'] = True
                key = 'figures'
            elif elem['c'] == 'TablesHere':
                if floats['saw_tables']:
                    return None
                floats['saw_tables'] = True
                key = 'tables'
            else:
                return None
            return [Div(['collected-' + key , [], []], floats[key])]
    elif eltype == 'Table':
        floats['tables'].append(Table(*eldata))
        filler = "[Table %d about here.]" % len(floats['tables'])
        return Para([Str(filler)])
    return None

if __name__ == "__main__":
    toJSONFilter(collect_floats)
	#!/usr/bin/env perl

	=pod

	Pandoc filter which emulate the LaTeX endfloat package by extracting all
	elements which would be LaTeX floats (figures and tables) from a
	document and putting them in div with the id "figures" or "tables"
	respectively. You must mark the points in the document where you want
	the floats to go with a paragraph containing only the text
	"FiguresHere" or "TablesHere" -- exactly as written here in CamelCase --
	or you will lose the floats! If there are several paragraphs with the
	sentinel texts only the one first found will be replaced with a div
	containing the figures/tables.

	Additionally a paragraph with the text "[Figure %d about here.]" or
	"[Table %d about here.]" is inserted into the document where the
	figure/table used to be, with "%d" being the number of figures/tables
	found so far; thus it is not and cannot be guaranteed to be the same
	number as LaTeX would have assigned!

	Reference: <https://groups.google.com/d/topic/pandoc-discuss/jLUuYFcRDtk/discussion>

	This filter requires perl interpreter and the
	JSON::MaybeXS and Data::Rmap modules to run.

	Most operating systems other than Windows come with perl already installed.
	If you are on Windows I recommend downloading and installing
	Strawberry Perl: <http://strawberryperl.com>.

	If/once you have perl installed run the following commands:

	cpan App::cpanminus

	cpanm JSON::MaybeXS Data::Rmap

	Then run pandoc with the filter:

	pandoc -F ./pandoc-collect-floats.pl [OPTIONS] INPUTFILE

	=cut

	use utf8; # so literals and identifiers can be in UTF-8
	use strict; # quote strings, declare variables
	use warnings; # on by default

	use JSON::MaybeXS qw[ decode_json encode_json ];
	use Data::Rmap qw[ rmap_hash ];

	my $format = shift @ARGV;

	my $json = do { local $/; <>; };
	my $doc = decode_json( $json );

	my %floats = ( #
	figures => [],
	saw_figures => 0,
	tables => [],
	saw_tables => 0,
	);

	rmap_hash {
	return unless exists $_->{t} and exists $_->{c};
	my $elem = $_;
	if ( 'Para' eq $elem->{t} ) {
	return unless 1 == @{ $elem->{c} };
	if ( 'Image' eq $elem->{c}[0]{t} ) {
	return unless $elem->{c}[0]{c}[-1][1] =~ /^fig\:/;
	push @{ $floats{figures} }, $elem;
	my $count = @{ $floats{figures} };
	$_ = +{
	t => 'Para',
	c => [ +{ t => 'Str', c => "[Figure $count about here.]" } ],
	};
	}
	elsif ( 'Str' eq $elem->{c}[0]{t} ) {
	return unless $elem->{c}[0]{c} =~ /^(Figures\|Tables)Here$/;
	my $id = lc "collected-$1";
	$_ = +{ t => 'Div', c => [ [ $id, [], [] ], $floats{$id} ], };
	}
	}
	elsif ( 'Table' eq $elem->{t} ) {
	push @{ $floats{tables} }, $elem;
	my $count = @{ $floats{tables} };
	$_ = +{
	t => 'Para',
	c => [ +{ t => 'Str', c => "[Table $count about here.]" } ],
	};
	}
	return;
	}
	$doc;

	print encode_json( $doc );
	#!/usr/bin/env python

	"""
	Pandoc filter which emulate the LaTeX endfloat package by extracting all
	elements which would be LaTeX floats (figures and tables) from a
	document and putting them in div with the id "figures" or "tables"
	respectively. You must mark the points in the document where you want
	the floats to go with a paragraph containing only the text
	"FiguresHere" or "TablesHere" -- exactly as written here in CamelCase --
	or you will lose the floats! If there are several paragraphs with the
	sentinel texts only the one first found will be replaced with a div
	containing the figures/tables.

	Additionally a paragraph with the text "[Figure %d about here.]" or
	"[Table %d about here.]" is inserted into the document where the
	figure/table used to be, with "%d" being the number of figures/tables
	found so far; thus it is not and cannot be guaranteed to be the same
	number as LaTeX would have assigned!

	Reference: <https://groups.google.com/d/topic/pandoc-discuss/jLUuYFcRDtk/discussion>

	This filter requires the pandocfilters module to be installed. You can
	clone or download it from GitHub (with instructions for installing and
	how to use filters): https://github.com/jgm/pandocfilters or install
	from PyPI::

	pip install pandocfilters

	If you have an earlier version installed you may need to do::

	pip install -U pandocfilters
	"""

	from pandocfilters import toJSONFilter, Div, Image, Para, Str, Table

	floats = {
	'figures': [],
	'saw_figures': None,
	'tables': [],
	'saw_tables': None
	}

	def collect_floats(eltype, eldata, fmt, meta):
	global floats
	if eltype == 'Para':
	if len(eldata) != 1:
	return None
	elem = eldata[0];
	if elem['t'] == 'Image':
	if elem['c'][-1][1].startswith('fig:'): # title
	floats['figures'].append(Para(eldata))
	filler = "[Figure %d about here.]" % len(floats['figures'])
	return Para([Str(filler)])
	elif elem['t'] == 'Str':
	text = elem['c']
	if elem['c'] == 'FiguresHere':
	if floats['saw_figures']:
	return None
	floats['saw_figures'] = True
	key = 'figures'
	elif elem['c'] == 'TablesHere':
	if floats['saw_tables']:
	return None
	floats['saw_tables'] = True
	key = 'tables'
	else:
	return None
	return [Div(['collected-' + key , [], []], floats[key])]
	elif eltype == 'Table':
	floats['tables'].append(Table(*eldata))
	filler = "[Table %d about here.]" % len(floats['tables'])
	return Para([Str(filler)])
	return None

	if __name__ == "__main__":
	toJSONFilter(collect_floats)