Skip to content

Instantly share code, notes, and snippets.

@Kladdy
Last active April 28, 2024 12:12
Monkey patch for SVG files in python-docx
# encoding: utf-8
# This module monkey patches the docx library to add support for SVG images
# Put in a local folder and "import docx_svg_patch" to enable SVG support.
# Based on https://github.com/python-openxml/python-docx/pull/1107#issuecomment-1791518118
# Also based on https://gist.github.com/spillz/1667dd8b04654f32b51133cb7f72b898
from __future__ import absolute_import, division, print_function
import docx
from docx.image.exceptions import UnrecognizedImageError
from docx.image.constants import MIME_TYPE
from docx.image.exceptions import InvalidImageStreamError
from docx.image.helpers import BIG_ENDIAN, StreamReader
from docx.image.image import BaseImageHeader
import struct
import xml.etree.ElementTree as ET
def _ImageHeaderFactory(stream):
"""
Return a |BaseImageHeader| subclass instance that knows how to parse the
headers of the image in *stream*.
"""
from docx.image import SIGNATURES
def read_64(stream):
stream.seek(0)
return stream.read(64)
header = read_64(stream)
for cls, offset, signature_bytes in SIGNATURES:
end = offset + len(signature_bytes)
found_bytes = header[offset:end]
if found_bytes == signature_bytes:
return cls.from_stream(stream)
raise UnrecognizedImageError
class Svg(BaseImageHeader):
"""
Image header parser for SVG images.
"""
@classmethod
def from_stream(cls, stream):
"""
Return |Svg| instance having header properties parsed from SVG image
in *stream*.
"""
px_width, px_height = cls._dimensions_from_stream(stream)
return cls(px_width, px_height, 72, 72)
@property
def content_type(self):
"""
MIME content type for this image, unconditionally `image/svg+xml` for
SVG images.
"""
return MIME_TYPE.SVG
@property
def default_ext(self):
"""
Default filename extension, always 'svg' for SVG images.
"""
return "svg"
@classmethod
def _dimensions_from_stream(cls, stream):
stream.seek(0)
data = stream.read()
root = ET.fromstring(data)
# The width could be expressed as '4cm' or '720pt' or '100%', for example.
# See https://www.w3.org/TR/SVG11/struct.html#NewDocument
# Hence we need to parse the string with only the numeric part (remove alpha characters and %)
width_str = root.attrib["width"]
height_str = root.attrib["height"]
width = int(''.join([c for c in width_str if not c.isalpha() or c == '%']))
height = int(''.join([c for c in height_str if not c.isalpha() or c == '%']))
return width, height
docx.image.Svg = Svg
docx.image.constants.MIME_TYPE.SVG = 'image/svg+xml'
docx.image.SIGNATURES = tuple(list(docx.image.SIGNATURES) + [(Svg, 0, b'<?xml version=')])
docx.image.image._ImageHeaderFactory = _ImageHeaderFactory
@nunamia
Copy link

nunamia commented Jan 11, 2024

_ImageHeaderFactory have same problem,
like this svg begin text:

@nunamia
Copy link

nunamia commented Jan 11, 2024

<?xml version="1.0" encoding="UTF-8"?> <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="317.109" height="16.582" viewBox="0 0 317.109 16.582"> <defs> <g> <g id="glyph-0-0">

@Kladdy
Copy link
Author

Kladdy commented Jan 11, 2024

@nunamia Yes, I will update the gist with a new one that fixes that problem.

@Kladdy
Copy link
Author

Kladdy commented Jan 11, 2024

@nunamia The gist has now been updated and now it supports the correct SVG XML formatting. Please let me know if it works for you!

@nunamia
Copy link

nunamia commented Jan 12, 2024

Yes,It's worked.but i think change 'SVG_SIGNATURES = (
(Svg, 0, b'<svg '),
(Svg, 0, b'<?xml '),
(Svg, 0, b'<!DOCTYP svg'),
)' is better and read_64 change to read_256.

@goomesthiago
Copy link

I tried to use it but I'm newbie to development so I still get the following error message:

"
File c:\users\thiag\untitled1.py:85 in _dimensions_from_stream
width = int(''.join([c for c in width_str if not c.isalpha() or c == '%']))

ValueError: invalid literal for int() with base 10: '717.005703'"

The only thing I made was to save the docx_svg_patch.py file in the same path of my main.py and tried to add the image as usual with doc.add_picture

Is there anything that I'm missing?

@Kladdy
Copy link
Author

Kladdy commented Apr 24, 2024

@goomesthiago Looks like your SVG has a width with floating point value instead of integer (whole number) value. Try these two options, I would recommend the 1st one:

  1. Change int to round and float, in line 85 of untitled.py, so it would be width = round(float(''.join([c for c in width_str if not c.isalpha() or c == '%']))). This converts the string 717.005703 to a float and then rounds it to the nearest integer. You might need to do the same thing for height, so it should be height = round(float(''.join([c for c in height_str if not c.isalpha() or c == '%'])))
  2. Go into your .csv-file and change the value 717.005703 to 717. Try running the command again and see if you also need to change some other parameter, perhaps height.

Let me know how it works out for you!

@goomesthiago
Copy link

goomesthiago commented Apr 24, 2024

@goomesthiago Looks like your SVG has a width with floating point value instead of integer (whole number) value. Try these two options, I would recommend the 1st one:

  1. Change int to round and float, in line 85 of untitled.py, so it would be width = round(float(''.join([c for c in width_str if not c.isalpha() or c == '%']))). This converts the string 717.005703 to a float and then rounds it to the nearest integer. You might need to do the same thing for height, so it should be height = round(float(''.join([c for c in height_str if not c.isalpha() or c == '%'])))
  2. Go into your .csv-file and change the value 717.005703 to 717. Try running the command again and see if you also need to change some other parameter, perhaps height.

Let me know how it works out for you!

Hey, @Kladdy! Thanks for the help

I'm trying as you instructed me but I still receive the error message below.
Could you help me?

Reloaded modules: docx_svg_patch
Traceback (most recent call last):

File ~\anaconda3\Lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)

File c:\users\thiag\onedrive\work\nexo estudos\renato\exportar_figura_wmf\gera_docx_png_only.py:40
criar_docx_e_salvar_pdf(caminho_pasta)

File c:\users\thiag\gera_docx_png_only.py:22 in criar_docx_e_salvar_pdf
doc.add_picture(caminho_completo, width=Inches(6.45))

File ~\anaconda3\Lib\site-packages\docx\document.py:90 in add_picture
return run.add_picture(image_path_or_stream, width, height)

File ~\anaconda3\Lib\site-packages\docx\text\run.py:79 in add_picture
inline = self.part.new_pic_inline(image_path_or_stream, width, height)

File ~\anaconda3\Lib\site-packages\docx\parts\story.py:71 in new_pic_inline
rId, image = self.get_or_add_image(image_descriptor)

File ~\anaconda3\Lib\site-packages\docx\parts\story.py:37 in get_or_add_image
image_part = package.get_or_add_image_part(image_descriptor)

File ~\anaconda3\Lib\site-packages\docx\package.py:31 in get_or_add_image_part
return self.image_parts.get_or_add_image_part(image_descriptor)

File ~\anaconda3\Lib\site-packages\docx\package.py:74 in get_or_add_image_part
image = Image.from_file(image_descriptor)

File ~\anaconda3\Lib\site-packages\docx\image\image.py:52 in from_file
return cls._from_stream(stream, blob, filename)

File ~\anaconda3\Lib\site-packages\docx\image\image.py:164 in _from_stream
image_header = _ImageHeaderFactory(stream)

File ~\docx_svg_patch.py:43 in _ImageHeaderFactory
return cls.from_stream(stream)

File \docx_svg_patch.py:57 in from_stream
px_width, px_height = cls._dimensions_from_stream(stream)

File \docx_svg_patch.py:85 in _dimensions_from_stream
print(width_str)

ValueError: invalid literal for int() with base 10: '457.774606'

The way the dimensions are implemented in my .svg files is as it follows:

@goomesthiago
Copy link

And another thing... is there any possibility of keeping the dimensions as float? Or just as int?

@goomesthiago
Copy link

When I change the values of width and height in the .svg files to an integer number it works fine, but these dimensions are generated by matplotlib in another module of my script and it's generated like float numbers... :(

@Kladdy
Copy link
Author

Kladdy commented Apr 24, 2024

@goomesthiago Hmm, very weird. Could you perhaps send me the entire codebase you are using? Because if you are using round with no other arguments, the value should be an int.

You could try editing it so it becomes

width = int(round(float(''.join([c for c in width_str if not c.isalpha() or c == '%']))))
height = int(round(float(''.join([c for c in height_str if not c.isalpha() or c == '%']))))

Let me know how it works!

@goomesthiago
Copy link

goomesthiago commented Apr 24, 2024

@Kladdy yes I can!

Here's my docx_svg_patch.py file:

# This module monkey patches the docx library to add support for SVG images
# Put in a local folder and "import docx_svg_patch" to enable SVG support.
# Based on https://github.com/python-openxml/python-docx/pull/1107#issuecomment-1791518118
# Also based on https://gist.github.com/spillz/1667dd8b04654f32b51133cb7f72b898

from __future__ import absolute_import, division, print_function

import docx
from docx.image.exceptions import UnrecognizedImageError
from docx.image.constants import MIME_TYPE
from docx.image.exceptions import InvalidImageStreamError
from docx.image.helpers import BIG_ENDIAN, StreamReader
from docx.image.image import BaseImageHeader
import struct
import xml.etree.ElementTree as ET



def _ImageHeaderFactory(stream):
    """
    Return a |BaseImageHeader| subclass instance that knows how to parse the
    headers of the image in *stream*.
    """
    from docx.image import SIGNATURES

    def read_64(stream):
        stream.seek(0)
        return stream.read(64)

    header = read_64(stream)
    for cls, offset, signature_bytes in SIGNATURES:
        end = offset + len(signature_bytes)
        found_bytes = header[offset:end]
        if found_bytes == signature_bytes:
            return cls.from_stream(stream)
    raise UnrecognizedImageError

class Svg(BaseImageHeader):
    """
    Image header parser for SVG images.
    """

    @classmethod
    def from_stream(cls, stream):
        """
        Return |Svg| instance having header properties parsed from SVG image
        in *stream*.
        """
        px_width, px_height = cls._dimensions_from_stream(stream)
        return cls(px_width, px_height, 72, 72)

    @property
    def content_type(self):
        """
        MIME content type for this image, unconditionally `image/svg+xml` for
        SVG images.
        """
        return MIME_TYPE.SVG

    @property
    def default_ext(self):
        """
        Default filename extension, always 'svg' for SVG images.
        """
        return "svg"

    @classmethod
    def _dimensions_from_stream(cls, stream):
        stream.seek(0)
        data = stream.read()
        root = ET.fromstring(data)
        # The width could be expressed as '4cm' or '720pt' or '100%', for example.
        # See https://www.w3.org/TR/SVG11/struct.html#NewDocument
        # Hence we need to parse the string with only the numeric part (remove alpha characters and %)
        width_str = root.attrib["width"]
        height_str = root.attrib["height"]
        width = int(round(float(''.join([c for c in width_str if not c.isalpha() or c == '%']))))
        height = int(round(float(''.join([c for c in height_str if not c.isalpha() or c == '%']))))
        return width, height


docx.image.Svg = Svg
docx.image.constants.MIME_TYPE.SVG = 'image/svg+xml'
docx.image.SIGNATURES = tuple(list(docx.image.SIGNATURES) + [(Svg,  0, b'<?xml version=')])
docx.image.image._ImageHeaderFactory = _ImageHeaderFactory```


And here is the main code:

```# -*- coding: utf-8 -*-
"""
Created on Tue Apr 23 17:50:45 2024

@author: thiag
"""

import os
from docx import Document
from docx.shared import Inches
from comtypes.client import CreateObject
import docx_svg_patch

def criar_docx_e_salvar_pdf(caminho_pasta):
    doc = Document() #cria um documento do  word
    
    for arquivo in os.listdir(caminho_pasta):
        if arquivo.endswith(".svg"):
            caminho_completo = os.path.join(caminho_pasta, arquivo)
            
            #inserir imagem e texto após cada imagem
            doc.add_picture(caminho_completo, width=Inches(6.45))
            doc.add_paragraph("Olá testando a feature")

    nome_docx = os.path.join(caminho_pasta, "Documento.docx")
    doc.save(nome_docx) #salva  o docx
    
    converter_docx_para_pdf(nome_docx, nome_docx.replace('.docx', '.pdf'))

def converter_docx_para_pdf(nome_docx, nome_pdf):
    word = CreateObject('Word.Application')
    doc = word.Documents.Open(nome_docx)
    doc.SaveAs(nome_pdf, FileFormat=17)  #FileFormat=17 é a opção de salvar como PDF
    doc.Close()
    word.Quit()


caminho_pasta = r'folder_with_the_svg_Files' #caminho completo para a pasta com imagens

criar_docx_e_salvar_pdf(caminho_pasta)```


An example of svg file I'm trying to use is attached
![vff_L75_P](https://gist.github.com/assets/49957343/0d1b99e9-c1ea-41e5-a1c3-5e068cb66f71)



@Kladdy
Copy link
Author

Kladdy commented Apr 28, 2024

For me, this works perfectly:

import os
from docx import Document
from docx.shared import Inches
import docx_svg_patch

def criar_docx_e_salvar_pdf(caminho_pasta):
    doc = Document() #cria um documento do  word
    
    for arquivo in os.listdir(caminho_pasta):
        if arquivo.endswith(".svg"):
            caminho_completo = os.path.join(caminho_pasta, arquivo)
            
            #inserir imagem e texto após cada imagem
            doc.add_picture(caminho_completo, width=Inches(6.45))
            doc.add_paragraph("Olá testando a feature")

    nome_docx = os.path.join(caminho_pasta, "Documento.docx")
    doc.save(nome_docx) #salva  o docx

caminho_pasta = r'.' #caminho completo para a pasta com imagens

criar_docx_e_salvar_pdf(caminho_pasta)

What version of Python and python-docx are you running? What error messages are you getting?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment