Created
May 12, 2020 16:40
-
-
Save ThibaudLamothe/17bf0cca3815f6614827257262e2a9ce to your computer and use it in GitHub Desktop.
Extracting texts from text_frame in a ppt file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pptx import Presentation | |
def get_texts_from_file(input_file): | |
# Instantiate variable to store the texts | |
texts = [] | |
# Load the presentation | |
prs = Presentation(input_file) | |
# Fore each slide in tthe presentation | |
for slide_number, slide in enumerate(prs.slides): | |
# For each shape in a slide | |
for shape in slide.shapes: | |
# Testing the "has_text_frame" parameter | |
if shape.has_text_frame: | |
# For each paragraph of the text_frame | |
for paragraph in shape.text_frame.paragraphs: | |
# Prepare new text | |
text = paragraph.text | |
texts.append(text) | |
return texts | |
if __name__ == "__main__": | |
file_name = '../ppt_translate.pptx' | |
texts = get_texts_from_file(file_name) | |
print('Texts has been selected.') | |
print('Found {} paragraphs.'.format(len(texts))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment