Skip to content

Instantly share code, notes, and snippets.

@zealinux
Last active December 4, 2019 14:23
Show Gist options
  • Save zealinux/be1dace9600ed68ae31a55739e385779 to your computer and use it in GitHub Desktop.
Save zealinux/be1dace9600ed68ae31a55739e385779 to your computer and use it in GitHub Desktop.
python-pptx-utils
# https://github.com/scanny/python-pptx/issues/281
paragraph = text_frame.paragraphs[0]
paragraph.text = text
font = paragraph.font
font.color.rgb = RGBColor(...)
# https://stackoverflow.com/questions/51701626/how-to-extract-text-from-a-text-shape-within-a-group-shape-in-powerpoint-using
from pptx.enum.shapes import MSO_SHAPE_TYPE
for slide in prs.slides:
# ---only operate on group shapes---
group_shapes = [
shp for shp in slide.shapes
if shp.shape_type == MSO_SHAPE_TYPE.GROUP
]
for group_shape in group_shapes:
for shape in group_shape.shapes:
if shape.has_text_frame:
print(shape.text)
# ----
from pptx.shapes.group import GroupShape
from pptx import Presentation
prs = Presentation(pptx_file)
textrun=[]
for slide in prs.slides:
for shape in slide.shapes:
pass
def checkrecursivelyfortext(shpthissetofshapes,textrun):
for shape in shpthissetofshapes:
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
textrun=checkrecursivelyfortext(shape.shapes,textrun)
else:
if hasattr(shape, "text"):
print(shape.text)
textrun.append(shape.text)
return textrun
# 1
def extract(filename, **kwargs):
presentation = Presentation(filename)
text_runs = []
for slide in presentation.slides:
for shape in slide.shapes:
if not shape.has_text_frame:
continue
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
text_runs.append(run.text)
return '\n\n'.join(text_runs)
# https://github.com/scanny/python-pptx/issues/285
def replace_paragraph_text_retaining_initial_formatting(paragraph, new_text):
p = paragraph._p # the lxml element containing the `<a:p>` paragraph element
# remove all but the first run
for idx, run in enumerate(paragraph.runs):
if idx == 0:
continue
p.remove(run._r)
paragraph.runs[0].text = new_text
paragraph = textframe.paragraph[0] # or wherever you get the paragraph from
new_text = 'foobar'
replace_paragraph_text_retaining_initial_formatting(paragraph, new_text)
# https://stackoverflow.com/questions/37924808/python-pptx-power-point-find-and-replace-text-ctrl-h
def search_and_replace(search_str, repl_str, input, output):
""""search and replace text in PowerPoint while preserving formatting"""
#Useful Links ;)
#https://stackoverflow.com/questions/37924808/python-pptx-power-point-find-and-replace-text-ctrl-h
#https://stackoverflow.com/questions/45247042/how-to-keep-original-text-formatting-of-text-with-python-powerpoint
from pptx import Presentation
prs = Presentation(input)
for slide in prs.slides:
for shape in slide.shapes:
if shape.has_text_frame:
if(shape.text.find(search_str))!=-1:
text_frame = shape.text_frame
cur_text = text_frame.paragraphs[0].runs[0].text
new_text = cur_text.replace(str(search_str), str(repl_str))
text_frame.paragraphs[0].runs[0].text = new_text
prs.save(output)
#
prs = Presentation('blah.pptx')
# To get shapes in your slides
slides = [slide for slide in prs.slides]
shapes = []
for slide in slides:
for shape in slide.shapes:
shapes.append(shape)
def replace_text(self, replacements: dict, shapes: List):
"""Takes dict of {match: replacement, ... } and replaces all matches.
Currently not implemented for charts or graphics.
"""
for shape in shapes:
for match, replacement in replacements.items():
if shape.has_text_frame:
if (shape.text.find(match)) != -1:
text_frame = shape.text_frame
for paragraph in text_frame.paragraphs:
for run in paragraph.runs:
cur_text = run.text
new_text = cur_text.replace(str(match), str(replacement))
run.text = new_text
if shape.has_table:
for row in shape.table.rows:
for cell in row.cells:
if match in cell.text:
new_text = cell.text.replace(match, replacement)
cell.text = new_text
replace_text({'string to replace': 'replacement text'}, shapes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment