Skip to content

Instantly share code, notes, and snippets.

@jeromerobert
Last active September 1, 2023 09:05
Show Gist options
  • Save jeromerobert/3996eca3acd12e4c3d40 to your computer and use it in GitHub Desktop.
Save jeromerobert/3996eca3acd12e4c3d40 to your computer and use it in GitHub Desktop.
Pandoc filter to create PDF files from SVG
#! /usr/bin/env python
"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""
__author__ = "Jerome Robert"
import mimetypes
import subprocess
import os
import sys
from pandocfilters import toJSONFilter, Str, Para, Image
fmt_to_option = {
"latex": ("--export-pdf","pdf"),
"beamer": ("--export-pdf","pdf"),
#use PNG because EMF and WMF break transparency
"docx": ("--export-png", "png"),
#because of IE
"html": ("--export-png", "png")
}
def svg_to_any(key, value, fmt, meta):
if key == 'Image':
if len(value) == 2:
# before pandoc 1.16
alt, [src, title] = value
attrs = None
else:
attrs, alt, [src, title] = value
mimet,_ = mimetypes.guess_type(src)
option = fmt_to_option.get(fmt)
if mimet == 'image/svg+xml' and option:
base_name,_ = os.path.splitext(src)
eps_name = base_name + "." + option[1]
try:
mtime = os.path.getmtime(eps_name)
except OSError:
mtime = -1
if mtime < os.path.getmtime(src):
cmd_line = ['inkscape', option[0], eps_name, src]
sys.stderr.write("Running %s\n" % " ".join(cmd_line))
subprocess.call(cmd_line, stdout=sys.stderr.fileno())
if attrs:
return Image(attrs, alt, [eps_name, title])
else:
return Image(alt, [eps_name, title])
if __name__ == "__main__":
toJSONFilter(svg_to_any)
@RedX2501
Copy link

I had a problem with whitspace in the filename. The easiest way for to remove them. Also when using this script in a command chain nothing would happen. So i added the option to convert as a default action if the output format was unknown.

#! /usr/bin/env python
"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""

__author__ = "Jerome Robert"

import mimetypes
import subprocess
import os
import sys
from pandocfilters import toJSONFilter, Image

# TODO add emf export if fmt=="docx" ?
fmt_to_option = {
    "latex": ("--export-pdf", "pdf"),
    "beamer": ("--export-pdf", "pdf"),
    # because of IE
    "html": ("--export-png", "png")
}


def svg_to_any(key, value, fmt, meta):
    if key == 'Image':
        attrs, alt, [src, title] = value
        mimet, _ = mimetypes.guess_type(src)
        option = fmt_to_option.get(fmt, ("--export-pdf", "pdf"))
        if mimet == 'image/svg+xml' and option:
            base_name, _ = os.path.splitext(src)
            eps_name = base_name + "." + option[1]
            eps_name = eps_name.replace("%20", "")            
            src = src.replace("%20", " ")
            try:
                mtime = os.path.getmtime(eps_name)
            except OSError:
                mtime = -1
            if mtime < os.path.getmtime(src):
                cmd_line = ['inkscape', option[0], eps_name, src]
                sys.stderr.write("Running %s\n" % " ".join(cmd_line))
                subprocess.call(cmd_line, stdout=sys.stderr.fileno())
            return Image(attrs, alt, [eps_name.replace("%20", " "), title])

if __name__ == "__main__":
    toJSONFilter(svg_to_any)

@Jmuccigr
Copy link

Thanks for doing this. Saved me today.

One thing: it's obvious, perhaps, but it bit me: you need to have inkscape available via the command line.

I used homebrew to install it. (I also already have the GUI version installed, so maybe a simple linking of that binary into my path would have worked. Hmmmm…)

Also, I've got this on my local system via the desktop app. Would someone mind updating the gist to this latest version that seems to work?

Does it not work with on-line images? I'm getting a file not found error for something.

@Jmuccigr
Copy link

Any thoughts on getting this to handle on-line images as well?

@n1zzo
Copy link

n1zzo commented Nov 13, 2016

Hi, on my updated Arch linux system the filter gives the following error:

Traceback (most recent call last):
  File "../pandoc-svgmk2.py", line 43, in <module>
    toJSONFilter(svg_to_any)
  File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 46, in toJSONFilter
    toJSONFilters([action])
  File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 76, in toJSONFilters
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
  File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 76, in <lambda>
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
KeyError: 0
pandoc: Error running filter ../pandoc-svgmk2.py
Filter returned error status 1

Did some of you encounter the same error or has some idea on how to fix it?
I am using RedX2501 version of pandoc-svg.py with pandoc 1.18.

@orangecms
Copy link

Yes, simply rewrite the PKGBUILD from AUR to use version 1.4.1 of the pandocfilters package - see https://pypi.python.org/pypi/pandocfilters for the download link to replace it.

@DancingQuanta
Copy link

what about pdf_tex using the option --export-latex? This generates two files; a pdf and a tex file. The tex file have an extension .pdf_tex needs to be included in the document first with \input{}. This tex file will import the pdf file and place text on top of the image. This allows the image to have text formatted with the latex document

@juji
Copy link

juji commented Jul 23, 2017

handle online image

#! /usr/bin/env python

"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""

__author__ = "Jerome Robert"

import mimetypes
import subprocess
import os
import sys
import urllib
import re
from pandocfilters import toJSONFilter, Str, Para, Image

fmt_to_option = {
    "latex": ("--export-pdf","pdf"),
    "beamer": ("--export-pdf","pdf"),
    #use PNG because EMF and WMF break transparency
    "docx": ("--export-png", "png"),
    #because of IE
    "html": ("--export-png", "png")
}

def svg_to_any(key, value, fmt, meta):
    if key == 'Image':
       if len(value) == 2:
           # before pandoc 1.16
           alt, [src, title] = value
           attrs = None
       else:
           attrs, alt, [src, title] = value

       if re.match('https?\://',src):
          srcm = re.sub('\?.+','',src)
          srcm = re.sub('\#.+','',srcm)
          srcm = re.sub('/$','',srcm)
       else:
          srcm = src

       mimet,_ = mimetypes.guess_type(srcm)
       option = fmt_to_option.get(fmt)

       if mimet == 'image/svg+xml' and option:
           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
           base_name,_ = os.path.splitext(bsnm)
           eps_name = base_name + "." + option[1]
           try:
               mtime = os.path.getmtime(eps_name)
           except OSError:
               mtime = -1
           if mtime < os.path.getmtime(src):
               cmd_line = ['inkscape', option[0], eps_name, src]
               sys.stderr.write("Running %s\n" % " ".join(cmd_line))
               subprocess.call(cmd_line, stdout=sys.stderr.fileno())
           if attrs:
               return Image(attrs, alt, [eps_name, title])
           else:
               return Image(alt, [eps_name, title])

if __name__ == "__main__":
  toJSONFilter(svg_to_any)

@jahomax
Copy link

jahomax commented Jun 5, 2019

An adjustment to @juji's reply: replace lines 47-51

           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
           base_name,_ = os.path.splitext(bsnm)

with

           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
               base_name,_ = os.path.splitext(bsnm)
           else:
               base_name, _ = os.path.splitext(src)

@whateverforever
Copy link

           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
               base_name,_ = os.path.splitext(bsnm)
               eps_name = base_name + "." + option[1]
           else:
               base_name, _ = os.path.splitext(src)
               eps_name = os.path.realpath(base_name + "." + option[1])
               src = os.path.realpath(src)

one further adjustment. Inkscape crashes on macos ** (inkscape-bin:91102): WARNING **: Can't open file: image.svg (doesn't exist), caused by some weird sh wrapper around inkscape, that sets some weird working directory. Using os.realpath() fixes that

@whateverforever
Copy link

whateverforever commented Jun 23, 2020

For the new Inkscape 1.0 on Catalina --export-filename has superseded --export-pdf

fmt_to_option = {
    "latex": ("--export-filename","pdf"),
    "beamer": ("--export-filename","pdf"),
    #use PNG because EMF and WMF break transparency
    "docx": ("--export-png", "png"),
    #because of IE
    "html": ("--export-png", "png")
}

@ghamerly
Copy link

--export-filename should now be used in place of both --export-pdf and --export-png.

fmt_to_option = {
    "latex": ("--export-filename","pdf"),
    "beamer": ("--export-filename","pdf"),
    #use PNG because EMF and WMF break transparency
    "docx": ("--export-filename", "png"),
    #because of IE
    "html": ("--export-filename", "png")
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment