Skip to content

Instantly share code, notes, and snippets.

@vitchyr
Last active December 18, 2023 12:31
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save vitchyr/4894861de3fa8d4ffcba to your computer and use it in GitHub Desktop.
Save vitchyr/4894861de3fa8d4ffcba to your computer and use it in GitHub Desktop.
Convert PDF to multiple SVGs with Inkscape and pyPdf
"""
Author: Vitchyr Pong
Convert a PDF to svg files. Note that this is pretty slow since it makes
subprocess calls to inkscape's PDF-to-SVG command line convert.
Requirements:
- Inkscape (https://inkscape.org/)
- pyPdf (http://pybrary.net/pyPdf/)
- A '/tmp' directory. If not, you must pass in another directory.
Use 'python pdf2svg.py -h' for more information
Example Usage:
$ python pdf2svgs.py path/to/foo.pdf path/to/svgs new_svg_name
will result in the new files:
path/to/svgs/new_svg_name_all_p0.svg
path/to/svgs/new_svg_name_all_p1.svg
etc.
"""
import argparse
import os
import subprocess
from pyPdf import PdfFileWriter, PdfFileReader
def split_pdf(fpath, name, tmp_dir):
"""
Split a pdf into multiple PDFs, one per page.
Parameters
----------
fpath : string
Path to a PDF.
name : string
Base name for the SVGs.
tmp_dir: string
A directory where temporary PDFs are saved
Return
------
pdfs: list of strings
A list of path directories to the temporary PDFs, in order.
"""
input_pdf = PdfFileReader(open(fpath, "rb"))
directory = os.path.dirname(fpath)
pdfs = []
for i in xrange(input_pdf.numPages):
output = PdfFileWriter()
output.addPage(input_pdf.getPage(i))
new_path = os.path.join(tmp_dir, name + "_p{0}.pdf".format(i))
with open(new_path, "wb") as outputStream:
output.write(outputStream)
pdfs.append(new_path)
return pdfs
def file_name(path):
""" Get the name of a file without the extension. """
return os.path.split(path)[-1].split(".")[0]
def pdf_to_svg(out_dir, fpath):
"""
Convert a pdf to an svg.
Parameters
----------
out_dir : string
Directory where to save the SVG.
fpath : string
Path to a PDF. The SVG will have the same name as this but with a .svg extension.
"""
out_name = file_name(fpath) + ".svg"
out_fpath = os.path.join(out_dir, out_name)
subprocess.call(["inkscape", "-l", out_fpath, fpath])
def main():
parser = argparse.ArgumentParser(description="Convert a PDF to svg files.")
parser.add_argument("pdf_path", help="Path to the pdf.")
parser.add_argument("svg_dir", help="Directory to save the svg.")
parser.add_argument("svg_name", help="Base name of the svgs.")
parser.add_argument("-t",
"--tmp_dir",
help="Where to save temporary PDFs.",
default="/tmp")
args = parser.parse_args()
pdfs = split_pdf(args.pdf_path, args.svg_name, args.tmp_dir)
if not os.path.exists(args.svg_dir):
os.makedirs(args.svg_dir)
for p in pdfs:
pdf_to_svg(args.svg_dir, p)
if __name__ == '__main__':
main()
@tensor-cp
Copy link

Why are even writing this code, I don't understand.

@K4ly4s
Copy link

K4ly4s commented Sep 3, 2023

Because you're awesome!

@apollo000104
Copy link

@tensor-cp
what is the meaning?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment