Created
May 27, 2024 04:17
-
-
Save KenjiOhtsuka/826057c59ec806009a70e207097b77f5 to your computer and use it in GitHub Desktop.
Extract all images in a PowerPoint file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pptx | |
from pptx.enum.shapes import MSO_SHAPE_TYPE | |
import os | |
# extract all images in the pptx file | |
def extract_images(pptx_path, output_dir): | |
i = 0 | |
ppt = pptx.Presentation(pptx_path) | |
for slide in ppt.slides: | |
for shape in slide.shapes: | |
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE: | |
i += 1 | |
# get image data | |
image = shape.image | |
image_bytes = image.blob | |
# set image path | |
image_path = f"{output_dir}/{i}.png" | |
# create output directory if not exists | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
# save image to file | |
with open(image_path, "wb") as f: | |
f.write(image_bytes) | |
print(f"Extracted image: {image_path}") | |
if __name__ == "__main__": | |
# extract all images in the pptx file | |
extract_images("presentation.pptx", "output") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment