adejones/python-docx-replace-text-and-retain-style.py

## 91 changes: 91 additions & 0 deletions python-docx-replace-text-and-retain-style.py
@@ -0,0 +1,91 @@

    def docx_replace(doc, data):
def docx_replace(doc, data):

    	paragraphs = list(doc.paragraphs)
	paragraphs = list(doc.paragraphs)

    	for t in doc.tables:
	for t in doc.tables:

    		for row in t.rows:
		for row in t.rows:

    			for cell in row.cells:
			for cell in row.cells:

    				for paragraph in cell.paragraphs:
				for paragraph in cell.paragraphs:

    					paragraphs.append(paragraph)
					paragraphs.append(paragraph)

    	for p in paragraphs:
	for p in paragraphs:

    		for key, val in data.items():
		for key, val in data.items():

    			key_name = '${{{}}}'.format(key) # use placeholders in the form ${PlaceholderName}
			key_name = '${{{}}}'.format(key) # use placeholders in the form ${PlaceholderName}

    			if key_name in p.text:
			if key_name in p.text:

    				inline = p.runs
				inline = p.runs

    				# Replace strings and retain the same style.
				# Replace strings and retain the same style.

    				# The text to be replaced can be split over several runs so
				# The text to be replaced can be split over several runs so

    				# search through, identify which runs need to have text replaced
				# search through, identify which runs need to have text replaced

    				# then replace the text in those identified
				# then replace the text in those identified

    				started = False
				started = False

    				key_index = 0
				key_index = 0

    				# found_runs is a list of (inline index, index of match, length of match)
				# found_runs is a list of (inline index, index of match, length of match)

    				found_runs = list()
				found_runs = list()

    				found_all = False
				found_all = False

    				replace_done = False
				replace_done = False

    				for i in range(len(inline)):
				for i in range(len(inline)):


    					# case 1: found in single run so short circuit the replace
					# case 1: found in single run so short circuit the replace

    					if key_name in inline[i].text and not started:
					if key_name in inline[i].text and not started:

    						found_runs.append((i, inline[i].text.find(key_name), len(key_name)))
						found_runs.append((i, inline[i].text.find(key_name), len(key_name)))

    						text = inline[i].text.replace(key_name, str(val))
						text = inline[i].text.replace(key_name, str(val))

    						inline[i].text = text
						inline[i].text = text

    						replace_done = True
						replace_done = True

    						found_all = True
						found_all = True

    						break
						break


    					if key_name[key_index] not in inline[i].text and not started:
					if key_name[key_index] not in inline[i].text and not started:

    						# keep looking ...
						# keep looking ...

    						continue
						continue


    					# case 2: search for partial text, find first run
					# case 2: search for partial text, find first run

    					if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started:
					if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started:

    						# check sequence
						# check sequence

    						start_index = inline[i].text.find(key_name[key_index])
						start_index = inline[i].text.find(key_name[key_index])

    						check_length = len(inline[i].text)
						check_length = len(inline[i].text)

    						for text_index in range(start_index, check_length):
						for text_index in range(start_index, check_length):

    							if inline[i].text[text_index] != key_name[key_index]:
							if inline[i].text[text_index] != key_name[key_index]:

    								# no match so must be false positive
								# no match so must be false positive

    								break
								break

    						if key_index == 0:
						if key_index == 0:

    							started = True
							started = True

    						chars_found = check_length - start_index
						chars_found = check_length - start_index

    						key_index += chars_found
						key_index += chars_found

    						found_runs.append((i, start_index, chars_found))
						found_runs.append((i, start_index, chars_found))

    						if key_index != len(key_name):
						if key_index != len(key_name):

    							continue
							continue

    						else:
						else:

    							# found all chars in key_name
							# found all chars in key_name

    							found_all = True
							found_all = True

    							break
							break


    					# case 2: search for partial text, find subsequent run
					# case 2: search for partial text, find subsequent run

    					if key_name[key_index] in inline[i].text and started and not found_all:
					if key_name[key_index] in inline[i].text and started and not found_all:

    						# check sequence
						# check sequence

    						chars_found = 0
						chars_found = 0

    						check_length = len(inline[i].text)
						check_length = len(inline[i].text)

    						for text_index in range(0, check_length):
						for text_index in range(0, check_length):

    							if inline[i].text[text_index] == key_name[key_index]:
							if inline[i].text[text_index] == key_name[key_index]:

    								key_index += 1
								key_index += 1

    								chars_found += 1
								chars_found += 1

    							else:
							else:

    								break
								break

    						# no match so must be end
						# no match so must be end

    						found_runs.append((i, 0, chars_found))
						found_runs.append((i, 0, chars_found))

    						if key_index == len(key_name):
						if key_index == len(key_name):

    							found_all = True
							found_all = True

    							break
							break


    				if found_all and not replace_done:
				if found_all and not replace_done:

    					for i, item in enumerate(found_runs):
					for i, item in enumerate(found_runs):

    						index, start, length = [t for t in item]
						index, start, length = [t for t in item]

    						if i == 0:
						if i == 0:

    							text = inline[index].text.replace(inline[index].text[start:start + length], str(val))
							text = inline[index].text.replace(inline[index].text[start:start + length], str(val))

    							inline[index].text = text
							inline[index].text = text

    						else:
						else:

    							text = inline[index].text.replace(inline[index].text[start:start + length], '')
							text = inline[index].text.replace(inline[index].text[start:start + length], '')

    							inline[index].text = text
							inline[index].text = text

    				# print(p.text)
				# print(p.text)


    # usage
# usage


    doc = docx.Document('path/to/template.docx')
doc = docx.Document('path/to/template.docx')

    docx_replace(doc, dict(ItemOne='replacement text', ItemTwo="Some replacement text\nand some more")
docx_replace(doc, dict(ItemOne='replacement text', ItemTwo="Some replacement text\nand some more")

    doc.save('path/to/destination.docx')
doc.save('path/to/destination.docx')