Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Memorization wizard - takes docx file as input, outputs memorization-friendly versions
# This is a python script that reads an Office DOCX file,
# and writes a copy of the file that is changed in that
# every word that is not bolded or italicized is replaced
# with the first letter of the word.
# Imports.
from docx import Document
import re
import sys
import os
print('Memorization Wizard')
print('==================')
# Check if the user has provided a file to read. If not, prompt the user for the filename.
if len(sys.argv) < 2:
filename = input('Enter the name of the file to read: ')
else:
filename = sys.argv[1]
# Check if the file exists. If not, exit the program.
if not os.path.isfile(filename):
print('The file does not exist.')
sys.exit()
print('Given file: \n ' + filename)
# Read the file.
doc = Document(filename)
def rewrite(doc, mode):
# Loop through the paragraphs.
for para in doc.paragraphs:
# Loop through the runs.
for run in para.runs:
# If the run is bolded or italicized, don't change it.
if run.bold or run.italic:
continue
# Check if mode is "fill" or "strip".
if mode == 'fill':
# Replace each word in the run with the first letter of the word, and then the character _ for each letter after.
run.text = re.sub(r'\w+', lambda m: m.group(0)[0] + '_' * (len(m.group(0)) - 1), run.text)
elif mode == 'strip':
# Replace each word in the run with the first letter of the word.
run.text = re.sub(r'\w+', lambda m: m.group(0)[0], run.text)
# Return the document.
return doc
# Save a document with fill and strip modes.
rewrite(doc, 'fill').save('{0}_fill.docx'.format(filename.replace('.docx', '')))
rewrite(doc, 'strip').save('{0}_strip.docx'.format(filename.replace('.docx', '')))
print('Wrote files\n - {0}_fill.docx\n - {0}_strip.docx'.format(filename.replace('.docx', '')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment