Skip to content

Instantly share code, notes, and snippets.

@jeromerobert
Last active September 1, 2023 09:05
Show Gist options
  • Save jeromerobert/3996eca3acd12e4c3d40 to your computer and use it in GitHub Desktop.
Save jeromerobert/3996eca3acd12e4c3d40 to your computer and use it in GitHub Desktop.
Pandoc filter to create PDF files from SVG
#! /usr/bin/env python
"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""
__author__ = "Jerome Robert"
import mimetypes
import subprocess
import os
import sys
from pandocfilters import toJSONFilter, Str, Para, Image
fmt_to_option = {
"latex": ("--export-pdf","pdf"),
"beamer": ("--export-pdf","pdf"),
#use PNG because EMF and WMF break transparency
"docx": ("--export-png", "png"),
#because of IE
"html": ("--export-png", "png")
}
def svg_to_any(key, value, fmt, meta):
if key == 'Image':
if len(value) == 2:
# before pandoc 1.16
alt, [src, title] = value
attrs = None
else:
attrs, alt, [src, title] = value
mimet,_ = mimetypes.guess_type(src)
option = fmt_to_option.get(fmt)
if mimet == 'image/svg+xml' and option:
base_name,_ = os.path.splitext(src)
eps_name = base_name + "." + option[1]
try:
mtime = os.path.getmtime(eps_name)
except OSError:
mtime = -1
if mtime < os.path.getmtime(src):
cmd_line = ['inkscape', option[0], eps_name, src]
sys.stderr.write("Running %s\n" % " ".join(cmd_line))
subprocess.call(cmd_line, stdout=sys.stderr.fileno())
if attrs:
return Image(attrs, alt, [eps_name, title])
else:
return Image(alt, [eps_name, title])
if __name__ == "__main__":
toJSONFilter(svg_to_any)
@n1zzo
Copy link

n1zzo commented Nov 13, 2016

Hi, on my updated Arch linux system the filter gives the following error:

Traceback (most recent call last):
  File "../pandoc-svgmk2.py", line 43, in <module>
    toJSONFilter(svg_to_any)
  File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 46, in toJSONFilter
    toJSONFilters([action])
  File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 76, in toJSONFilters
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
  File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 76, in <lambda>
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
KeyError: 0
pandoc: Error running filter ../pandoc-svgmk2.py
Filter returned error status 1

Did some of you encounter the same error or has some idea on how to fix it?
I am using RedX2501 version of pandoc-svg.py with pandoc 1.18.

@orangecms
Copy link

Yes, simply rewrite the PKGBUILD from AUR to use version 1.4.1 of the pandocfilters package - see https://pypi.python.org/pypi/pandocfilters for the download link to replace it.

@DancingQuanta
Copy link

what about pdf_tex using the option --export-latex? This generates two files; a pdf and a tex file. The tex file have an extension .pdf_tex needs to be included in the document first with \input{}. This tex file will import the pdf file and place text on top of the image. This allows the image to have text formatted with the latex document

@juji
Copy link

juji commented Jul 23, 2017

handle online image

#! /usr/bin/env python

"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""

__author__ = "Jerome Robert"

import mimetypes
import subprocess
import os
import sys
import urllib
import re
from pandocfilters import toJSONFilter, Str, Para, Image

fmt_to_option = {
    "latex": ("--export-pdf","pdf"),
    "beamer": ("--export-pdf","pdf"),
    #use PNG because EMF and WMF break transparency
    "docx": ("--export-png", "png"),
    #because of IE
    "html": ("--export-png", "png")
}

def svg_to_any(key, value, fmt, meta):
    if key == 'Image':
       if len(value) == 2:
           # before pandoc 1.16
           alt, [src, title] = value
           attrs = None
       else:
           attrs, alt, [src, title] = value

       if re.match('https?\://',src):
          srcm = re.sub('\?.+','',src)
          srcm = re.sub('\#.+','',srcm)
          srcm = re.sub('/$','',srcm)
       else:
          srcm = src

       mimet,_ = mimetypes.guess_type(srcm)
       option = fmt_to_option.get(fmt)

       if mimet == 'image/svg+xml' and option:
           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
           base_name,_ = os.path.splitext(bsnm)
           eps_name = base_name + "." + option[1]
           try:
               mtime = os.path.getmtime(eps_name)
           except OSError:
               mtime = -1
           if mtime < os.path.getmtime(src):
               cmd_line = ['inkscape', option[0], eps_name, src]
               sys.stderr.write("Running %s\n" % " ".join(cmd_line))
               subprocess.call(cmd_line, stdout=sys.stderr.fileno())
           if attrs:
               return Image(attrs, alt, [eps_name, title])
           else:
               return Image(alt, [eps_name, title])

if __name__ == "__main__":
  toJSONFilter(svg_to_any)

@jahomax
Copy link

jahomax commented Jun 5, 2019

An adjustment to @juji's reply: replace lines 47-51

           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
           base_name,_ = os.path.splitext(bsnm)

with

           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
               base_name,_ = os.path.splitext(bsnm)
           else:
               base_name, _ = os.path.splitext(src)

@whateverforever
Copy link

           if re.match('https?\://',src):
               bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
               bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
               src,h = urllib.urlretrieve(src,bsnm)
               base_name,_ = os.path.splitext(bsnm)
               eps_name = base_name + "." + option[1]
           else:
               base_name, _ = os.path.splitext(src)
               eps_name = os.path.realpath(base_name + "." + option[1])
               src = os.path.realpath(src)

one further adjustment. Inkscape crashes on macos ** (inkscape-bin:91102): WARNING **: Can't open file: image.svg (doesn't exist), caused by some weird sh wrapper around inkscape, that sets some weird working directory. Using os.realpath() fixes that

@whateverforever
Copy link

whateverforever commented Jun 23, 2020

For the new Inkscape 1.0 on Catalina --export-filename has superseded --export-pdf

fmt_to_option = {
    "latex": ("--export-filename","pdf"),
    "beamer": ("--export-filename","pdf"),
    #use PNG because EMF and WMF break transparency
    "docx": ("--export-png", "png"),
    #because of IE
    "html": ("--export-png", "png")
}

@ghamerly
Copy link

--export-filename should now be used in place of both --export-pdf and --export-png.

fmt_to_option = {
    "latex": ("--export-filename","pdf"),
    "beamer": ("--export-filename","pdf"),
    #use PNG because EMF and WMF break transparency
    "docx": ("--export-filename", "png"),
    #because of IE
    "html": ("--export-filename", "png")
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment