Skip to content

Instantly share code, notes, and snippets.

Last active September 9, 2021 04:47
Show Gist options
  • Save shimondoodkin/7471075 to your computer and use it in GitHub Desktop.
Save shimondoodkin/7471075 to your computer and use it in GitHub Desktop.
search and replace in an Open XML word document.
Open XML Format SDK 2.5 - from NuGet
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml;
namespace SearchAndReplaceInText
public class SearchAndReplaceInText
public static List<int> AllIndexesOf(string str, string substr, bool ignoreCase = false) // modified of
var indexes = new List<int>();
if (string.IsNullOrWhiteSpace(str) ||
return indexes;
int index = 0;
while ((index = str.IndexOf(substr, index, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal)) != -1)
return indexes;
public static void openxml_replace_text(OpenXmlElement el, string from, string to)//version 2
// this is a quite smart and simple algorithm by Shimon Doodkin
// the idea is to concat all texts and search it as string.
// then replace text by positions step by step
StringBuilder innertext = new StringBuilder(); foreach (Text eltext in el.Descendants<Text>()) { innertext.Append(eltext.Text); } // maybe to add space if previous element had no space at the end and this element has no space at beggining or add new but this problem is only with tables..
string innertextstr = innertext.ToString();
List<int> foundat = AllIndexesOf(innertextstr, from);
List<int> foundatend = new List<int>();
for (int z = 0; z < foundat.Count; z++)
foundatend.Add(foundat[z] + from.Length - 1);
//if (foundat.Count != 0)
// Console.WriteLine("from:'" + from + "' between " + foundat[0] + " to " + foundatend[0]);
// for (int i = 0; i < innertextstr.Length; i++)
// {
// Console.WriteLine(" [" + i + "]: " + ((int)innertextstr[i]) + " '" + innertextstr[i] + "'");
// }
//Console.WriteLine(innertext.ToString().Contains(from) ? "contains" : "not found");
List<Text> tofixnewlines = new List<Text>();
List<Text> todeleteempty = new List<Text>();
List<string> tofixnewlines_str = new List<string>() ;
int currenttext_from = 0, currenttext_to = -1;
int innertextpos = 0;
if (foundat.Count != 0)
foreach (Text eltext in el.Descendants<Text>())
currenttext_from = currenttext_to + 1;
currenttext_to += eltext.Text.Length;
//Console.WriteLine("currenttext_from: " + currenttext_from + " currenttext_to: " + currenttext_to);
if (foundat.Count == 0) break;
if (foundat.First() <= currenttext_from && currenttext_from <= foundatend.First() // the beggining of this block is inside a found
|| foundat.First() <= currenttext_to && currenttext_to <= foundatend.First() // the end of this block is inside a found
|| currenttext_to <= foundat.First() && foundatend.First() <= currenttext_to // found is inside block
StringBuilder newtext = new StringBuilder();
//is innertextpos in a match?
innertextpos = currenttext_from;
for (int curchar = 0; curchar < eltext.Text.Length; curchar++)
if (foundat.Count == 0) break;
if (innertextpos == foundat.First())
else if (innertextpos >= foundat.First() && innertextpos <= foundatend.First())
int replacewithcharat = innertextpos - foundat.First();
if (innertextpos == foundatend.First())
//if (replacewithcharat < to.Length)
//newtext.Append(to.Substring(replacewithcharat + 1));
//append add rest;
string newtextstr = newtext.ToString();
if (newtextstr.IndexOf('\n') == -1)
eltext.Text = newtextstr;
eltext.Text = "to be replaced";
if (newtextstr.Length == 0)
* example word document with a newline
<w:p w:rsidR="00377636" w:rsidRDefault="00F653FC">
<w:rtl />
<w:rFonts w:hint="cs" />
<w:rtl />
<w:br />
<w:bookmarkStart w:id="0" w:name="_GoBack" />
<w:bookmarkEnd w:id="0" />
<w:sectPr w:rsidR="00377636" w:rsidSect="002510AE">
<w:pgSz w:w="11906" w:h="16838" />
<w:pgMar w:top="1440" w:right="1800" w:bottom="1440" w:left="1800" w:header="708" w:footer="708" w:gutter="0" />
<w:cols w:space="708" />
<w:bidi />
<w:rtlGutter />
<w:docGrid w:linePitch="360" />
// else
// Console.WriteLine(eltext.OuterXml);
//fix newlines:
for (int i = 0; i < tofixnewlines.Count; i++)
string[] lines = tofixnewlines_str[i].Replace("\r", "").Split('\n');
Text last_el = tofixnewlines[i];
OpenXmlElement newline_el;
OpenXmlElement copy_el;
last_el.Text = lines[0];
Text next_el;
for (int j = 1; j < lines.Length; j++)
//create nextline text
copy_el = last_el.Parent.CloneNode(true);
next_el = copy_el.Descendants<Text>().First();
next_el.Text = lines[j];
//create newline //"<w:r><w:rPr><w:rFonts w:hint="cs" /><w:rtl /></w:rPr><w:br /></w:r>"
newline_el = last_el.Parent.CloneNode(true);
IEnumerable<OpenXmlElement> se = newline_el.ChildElements.Where(e => e.LocalName != "rPr");
foreach (OpenXmlElement item in se) item.Remove();
newline_el.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Break());//<w:br />
last_el.Parent.InsertAfterSelf(newline_el);//add a newline after the last_el.Parent(the add order is switched,i always add after the first element but in reverse order)
last_el = next_el;
for (int i = 0; i <todeleteempty.Count; i++)
Text eltext = todeleteempty[i];
//if (eltext.Parent.ChildElements.Count <= 2 && newtextstr.Length == 0)// run.childern<=2 means Run countains the only w:rPr and w:t or just w:t
// {
eltext.Parent.Remove();//remove empty run,not sure if this is good, i dont know mybe run could countain other elements besides text like images.
// }
Copy link

r2d2tm commented Mar 19, 2020

this code cannot fully work...

That is not a possible condition
|| currenttext_to <= foundat.First() && foundatend.First() <= currenttext_to // found is inside block
must be replace with
|| currenttext_from <= foundat.First() && foundatend.First() <= currenttext_to // found is inside block

and with the new condition the code does not work properly....
example :
<w:t>#AGENCE# - #AgenceCP# #AgenceVille#</w:t>
became in ms word : AGENCE PAS DE CALAIS- 62000ceVille#
the last tag is truncated...

Copy link

I'm using Open XML and I should change the header tag value of a word file template? Please let me know.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment