Skip to content

Instantly share code, notes, and snippets.

@KenjiOhtsuka
Created May 27, 2024 04:17
Show Gist options
  • Save KenjiOhtsuka/826057c59ec806009a70e207097b77f5 to your computer and use it in GitHub Desktop.
Save KenjiOhtsuka/826057c59ec806009a70e207097b77f5 to your computer and use it in GitHub Desktop.
Extract all images in a PowerPoint file
import pptx
from pptx.enum.shapes import MSO_SHAPE_TYPE
import os
# extract all images in the pptx file
def extract_images(pptx_path, output_dir):
i = 0
ppt = pptx.Presentation(pptx_path)
for slide in ppt.slides:
for shape in slide.shapes:
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
i += 1
# get image data
image = shape.image
image_bytes = image.blob
# set image path
image_path = f"{output_dir}/{i}.png"
# create output directory if not exists
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# save image to file
with open(image_path, "wb") as f:
f.write(image_bytes)
print(f"Extracted image: {image_path}")
if __name__ == "__main__":
# extract all images in the pptx file
extract_images("presentation.pptx", "output")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment