# encoding: utf-8 | |
# This module monkey patches the docx library to add support for SVG images | |
# Put in a local folder and "import docx_svg_patch" to enable SVG support. | |
# Based on https://github.com/python-openxml/python-docx/pull/1107#issuecomment-1791518118 | |
# Also based on https://gist.github.com/spillz/1667dd8b04654f32b51133cb7f72b898 | |
from __future__ import absolute_import, division, print_function | |
import docx | |
from docx.image.exceptions import UnrecognizedImageError | |
from docx.image.constants import MIME_TYPE | |
from docx.image.exceptions import InvalidImageStreamError | |
from docx.image.helpers import BIG_ENDIAN, StreamReader | |
from docx.image.image import BaseImageHeader | |
import struct | |
import xml.etree.ElementTree as ET | |
def _ImageHeaderFactory(stream): | |
""" | |
Return a |BaseImageHeader| subclass instance that knows how to parse the | |
headers of the image in *stream*. | |
""" | |
from docx.image import SIGNATURES | |
def read_64(stream): | |
stream.seek(0) | |
return stream.read(64) | |
header = read_64(stream) | |
for cls, offset, signature_bytes in SIGNATURES: | |
end = offset + len(signature_bytes) | |
found_bytes = header[offset:end] | |
if found_bytes == signature_bytes: | |
return cls.from_stream(stream) | |
raise UnrecognizedImageError | |
class Svg(BaseImageHeader): | |
""" | |
Image header parser for SVG images. | |
""" | |
@classmethod | |
def from_stream(cls, stream): | |
""" | |
Return |Svg| instance having header properties parsed from SVG image | |
in *stream*. | |
""" | |
px_width, px_height = cls._dimensions_from_stream(stream) | |
return cls(px_width, px_height, 72, 72) | |
@property | |
def content_type(self): | |
""" | |
MIME content type for this image, unconditionally `image/svg+xml` for | |
SVG images. | |
""" | |
return MIME_TYPE.SVG | |
@property | |
def default_ext(self): | |
""" | |
Default filename extension, always 'svg' for SVG images. | |
""" | |
return "svg" | |
@classmethod | |
def _dimensions_from_stream(cls, stream): | |
stream.seek(0) | |
data = stream.read() | |
root = ET.fromstring(data) | |
# The width could be expressed as '4cm' or '720pt' or '100%', for example. | |
# See https://www.w3.org/TR/SVG11/struct.html#NewDocument | |
# Hence we need to parse the string with only the numeric part (remove alpha characters and %) | |
width_str = root.attrib["width"] | |
height_str = root.attrib["height"] | |
width = int(''.join([c for c in width_str if not c.isalpha() or c == '%'])) | |
height = int(''.join([c for c in height_str if not c.isalpha() or c == '%'])) | |
return width, height | |
docx.image.Svg = Svg | |
docx.image.constants.MIME_TYPE.SVG = 'image/svg+xml' | |
docx.image.SIGNATURES = tuple(list(docx.image.SIGNATURES) + [(Svg, 0, b'<?xml version=')]) | |
docx.image.image._ImageHeaderFactory = _ImageHeaderFactory |
<?xml version="1.0" encoding="UTF-8"?> <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="317.109" height="16.582" viewBox="0 0 317.109 16.582"> <defs> <g> <g id="glyph-0-0">
@nunamia Yes, I will update the gist with a new one that fixes that problem.
@nunamia The gist has now been updated and now it supports the correct SVG XML formatting. Please let me know if it works for you!
Yes,It's worked.but i think change 'SVG_SIGNATURES = (
(Svg, 0, b'<svg '),
(Svg, 0, b'<?xml '),
(Svg, 0, b'<!DOCTYP svg'),
)' is better and read_64 change to read_256.
I tried to use it but I'm newbie to development so I still get the following error message:
"
File c:\users\thiag\untitled1.py:85 in _dimensions_from_stream
width = int(''.join([c for c in width_str if not c.isalpha() or c == '%']))
ValueError: invalid literal for int() with base 10: '717.005703'"
The only thing I made was to save the docx_svg_patch.py file in the same path of my main.py and tried to add the image as usual with doc.add_picture
Is there anything that I'm missing?
@goomesthiago Looks like your SVG has a width with floating point value instead of integer (whole number) value. Try these two options, I would recommend the 1st one:
- Change
int
toround
andfloat
, in line 85 ofuntitled.py
, so it would bewidth = round(float(''.join([c for c in width_str if not c.isalpha() or c == '%'])))
. This converts the string717.005703
to afloat
and then rounds it to the nearestinteger
. You might need to do the same thing forheight
, so it should beheight = round(float(''.join([c for c in height_str if not c.isalpha() or c == '%'])))
- Go into your
.csv
-file and change the value717.005703
to717
. Try running the command again and see if you also need to change some other parameter, perhaps height.
Let me know how it works out for you!
@goomesthiago Looks like your SVG has a width with floating point value instead of integer (whole number) value. Try these two options, I would recommend the 1st one:
- Change
int
toround
andfloat
, in line 85 ofuntitled.py
, so it would bewidth = round(float(''.join([c for c in width_str if not c.isalpha() or c == '%'])))
. This converts the string717.005703
to afloat
and then rounds it to the nearestinteger
. You might need to do the same thing forheight
, so it should beheight = round(float(''.join([c for c in height_str if not c.isalpha() or c == '%'])))
- Go into your
.csv
-file and change the value717.005703
to717
. Try running the command again and see if you also need to change some other parameter, perhaps height.Let me know how it works out for you!
Hey, @Kladdy! Thanks for the help
I'm trying as you instructed me but I still receive the error message below.
Could you help me?
Reloaded modules: docx_svg_patch
Traceback (most recent call last):
File ~\anaconda3\Lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)
File c:\users\thiag\onedrive\work\nexo estudos\renato\exportar_figura_wmf\gera_docx_png_only.py:40
criar_docx_e_salvar_pdf(caminho_pasta)
File c:\users\thiag\gera_docx_png_only.py:22 in criar_docx_e_salvar_pdf
doc.add_picture(caminho_completo, width=Inches(6.45))
File ~\anaconda3\Lib\site-packages\docx\document.py:90 in add_picture
return run.add_picture(image_path_or_stream, width, height)
File ~\anaconda3\Lib\site-packages\docx\text\run.py:79 in add_picture
inline = self.part.new_pic_inline(image_path_or_stream, width, height)
File ~\anaconda3\Lib\site-packages\docx\parts\story.py:71 in new_pic_inline
rId, image = self.get_or_add_image(image_descriptor)
File ~\anaconda3\Lib\site-packages\docx\parts\story.py:37 in get_or_add_image
image_part = package.get_or_add_image_part(image_descriptor)
File ~\anaconda3\Lib\site-packages\docx\package.py:31 in get_or_add_image_part
return self.image_parts.get_or_add_image_part(image_descriptor)
File ~\anaconda3\Lib\site-packages\docx\package.py:74 in get_or_add_image_part
image = Image.from_file(image_descriptor)
File ~\anaconda3\Lib\site-packages\docx\image\image.py:52 in from_file
return cls._from_stream(stream, blob, filename)
File ~\anaconda3\Lib\site-packages\docx\image\image.py:164 in _from_stream
image_header = _ImageHeaderFactory(stream)
File ~\docx_svg_patch.py:43 in _ImageHeaderFactory
return cls.from_stream(stream)
File \docx_svg_patch.py:57 in from_stream
px_width, px_height = cls._dimensions_from_stream(stream)
File \docx_svg_patch.py:85 in _dimensions_from_stream
print(width_str)
ValueError: invalid literal for int() with base 10: '457.774606'
The way the dimensions are implemented in my .svg files is as it follows:
And another thing... is there any possibility of keeping the dimensions as float? Or just as int?
When I change the values of width and height in the .svg files to an integer number it works fine, but these dimensions are generated by matplotlib in another module of my script and it's generated like float numbers... :(
@goomesthiago Hmm, very weird. Could you perhaps send me the entire codebase you are using? Because if you are using round
with no other arguments, the value should be an int
.
You could try editing it so it becomes
width = int(round(float(''.join([c for c in width_str if not c.isalpha() or c == '%']))))
height = int(round(float(''.join([c for c in height_str if not c.isalpha() or c == '%']))))
Let me know how it works!
@Kladdy yes I can!
Here's my docx_svg_patch.py file:
# This module monkey patches the docx library to add support for SVG images
# Put in a local folder and "import docx_svg_patch" to enable SVG support.
# Based on https://github.com/python-openxml/python-docx/pull/1107#issuecomment-1791518118
# Also based on https://gist.github.com/spillz/1667dd8b04654f32b51133cb7f72b898
from __future__ import absolute_import, division, print_function
import docx
from docx.image.exceptions import UnrecognizedImageError
from docx.image.constants import MIME_TYPE
from docx.image.exceptions import InvalidImageStreamError
from docx.image.helpers import BIG_ENDIAN, StreamReader
from docx.image.image import BaseImageHeader
import struct
import xml.etree.ElementTree as ET
def _ImageHeaderFactory(stream):
"""
Return a |BaseImageHeader| subclass instance that knows how to parse the
headers of the image in *stream*.
"""
from docx.image import SIGNATURES
def read_64(stream):
stream.seek(0)
return stream.read(64)
header = read_64(stream)
for cls, offset, signature_bytes in SIGNATURES:
end = offset + len(signature_bytes)
found_bytes = header[offset:end]
if found_bytes == signature_bytes:
return cls.from_stream(stream)
raise UnrecognizedImageError
class Svg(BaseImageHeader):
"""
Image header parser for SVG images.
"""
@classmethod
def from_stream(cls, stream):
"""
Return |Svg| instance having header properties parsed from SVG image
in *stream*.
"""
px_width, px_height = cls._dimensions_from_stream(stream)
return cls(px_width, px_height, 72, 72)
@property
def content_type(self):
"""
MIME content type for this image, unconditionally `image/svg+xml` for
SVG images.
"""
return MIME_TYPE.SVG
@property
def default_ext(self):
"""
Default filename extension, always 'svg' for SVG images.
"""
return "svg"
@classmethod
def _dimensions_from_stream(cls, stream):
stream.seek(0)
data = stream.read()
root = ET.fromstring(data)
# The width could be expressed as '4cm' or '720pt' or '100%', for example.
# See https://www.w3.org/TR/SVG11/struct.html#NewDocument
# Hence we need to parse the string with only the numeric part (remove alpha characters and %)
width_str = root.attrib["width"]
height_str = root.attrib["height"]
width = int(round(float(''.join([c for c in width_str if not c.isalpha() or c == '%']))))
height = int(round(float(''.join([c for c in height_str if not c.isalpha() or c == '%']))))
return width, height
docx.image.Svg = Svg
docx.image.constants.MIME_TYPE.SVG = 'image/svg+xml'
docx.image.SIGNATURES = tuple(list(docx.image.SIGNATURES) + [(Svg, 0, b'<?xml version=')])
docx.image.image._ImageHeaderFactory = _ImageHeaderFactory```
And here is the main code:
```# -*- coding: utf-8 -*-
"""
Created on Tue Apr 23 17:50:45 2024
@author: thiag
"""
import os
from docx import Document
from docx.shared import Inches
from comtypes.client import CreateObject
import docx_svg_patch
def criar_docx_e_salvar_pdf(caminho_pasta):
doc = Document() #cria um documento do word
for arquivo in os.listdir(caminho_pasta):
if arquivo.endswith(".svg"):
caminho_completo = os.path.join(caminho_pasta, arquivo)
#inserir imagem e texto após cada imagem
doc.add_picture(caminho_completo, width=Inches(6.45))
doc.add_paragraph("Olá testando a feature")
nome_docx = os.path.join(caminho_pasta, "Documento.docx")
doc.save(nome_docx) #salva o docx
converter_docx_para_pdf(nome_docx, nome_docx.replace('.docx', '.pdf'))
def converter_docx_para_pdf(nome_docx, nome_pdf):
word = CreateObject('Word.Application')
doc = word.Documents.Open(nome_docx)
doc.SaveAs(nome_pdf, FileFormat=17) #FileFormat=17 é a opção de salvar como PDF
doc.Close()
word.Quit()
caminho_pasta = r'folder_with_the_svg_Files' #caminho completo para a pasta com imagens
criar_docx_e_salvar_pdf(caminho_pasta)```
An example of svg file I'm trying to use is attached
![vff_L75_P](https://gist.github.com/assets/49957343/0d1b99e9-c1ea-41e5-a1c3-5e068cb66f71)
For me, this works perfectly:
import os
from docx import Document
from docx.shared import Inches
import docx_svg_patch
def criar_docx_e_salvar_pdf(caminho_pasta):
doc = Document() #cria um documento do word
for arquivo in os.listdir(caminho_pasta):
if arquivo.endswith(".svg"):
caminho_completo = os.path.join(caminho_pasta, arquivo)
#inserir imagem e texto após cada imagem
doc.add_picture(caminho_completo, width=Inches(6.45))
doc.add_paragraph("Olá testando a feature")
nome_docx = os.path.join(caminho_pasta, "Documento.docx")
doc.save(nome_docx) #salva o docx
caminho_pasta = r'.' #caminho completo para a pasta com imagens
criar_docx_e_salvar_pdf(caminho_pasta)
What version of Python and python-docx
are you running? What error messages are you getting?
_ImageHeaderFactory have same problem,
like this svg begin text: