Skip to content

Instantly share code, notes, and snippets.

@ThibaudLamothe
Created May 12, 2020 16:40
Show Gist options
  • Save ThibaudLamothe/17bf0cca3815f6614827257262e2a9ce to your computer and use it in GitHub Desktop.
Save ThibaudLamothe/17bf0cca3815f6614827257262e2a9ce to your computer and use it in GitHub Desktop.
Extracting texts from text_frame in a ppt file
from pptx import Presentation
def get_texts_from_file(input_file):
# Instantiate variable to store the texts
texts = []
# Load the presentation
prs = Presentation(input_file)
# Fore each slide in tthe presentation
for slide_number, slide in enumerate(prs.slides):
# For each shape in a slide
for shape in slide.shapes:
# Testing the "has_text_frame" parameter
if shape.has_text_frame:
# For each paragraph of the text_frame
for paragraph in shape.text_frame.paragraphs:
# Prepare new text
text = paragraph.text
texts.append(text)
return texts
if __name__ == "__main__":
file_name = '../ppt_translate.pptx'
texts = get_texts_from_file(file_name)
print('Texts has been selected.')
print('Found {} paragraphs.'.format(len(texts)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment