Skip to content

Instantly share code, notes, and snippets.

@marciocrmendes
Last active November 23, 2017 14:15
Show Gist options
  • Save marciocrmendes/33798ef5a077b264127532ba28c917a1 to your computer and use it in GitHub Desktop.
Save marciocrmendes/33798ef5a077b264127532ba28c917a1 to your computer and use it in GitHub Desktop.
Implementação do código do Simon Doodkin que procura e substitui valores no OpenXml Word
using DocumentFormat.OpenXml.Packaging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Projeto.Commons.Services
{
public static class OpenXmlService
{
/// <summary>
/// Pega todos os índices da string
/// </summary>
/// <param name="str"></param>
/// <param name="value"></param>
/// <returns></returns>
private static List<int> AllIndexesOf(string str, string value)
{
if (String.IsNullOrEmpty(value))
throw new ArgumentException("the string to find may not be empty", "value");
List<int> indexes = new List<int>();
for (int index = 0; ; index += value.Length)
{
index = str.IndexOf(value, index);
if (index == -1)
return indexes;
indexes.Add(index);
}
}
/// <summary>
/// Substitui os valores no elemento
/// </summary>
/// <param name="element"></param>
/// <param name="target"></param>
/// <param name="replace"></param>
public static void ReplaceInElement(this DocumentFormat.OpenXml.OpenXmlElement element, string target, string replace)
{
element.RunThroughElements(target, replace);
}
/// <summary>
/// Substitui os valores na parte
/// </summary>
/// <param name="part"></param>
/// <param name="target"></param>
/// <param name="replace"></param>
public static void ReplaceInPart(this DocumentFormat.OpenXml.Packaging.OpenXmlPart part, string target, string replace)
{
part.RunThroughElements(target, replace);
}
/// <summary>
/// Substitui os valores nas partes
/// </summary>
/// <param name="parts"></param>
/// <param name="target"></param>
/// <param name="replace"></param>
public static void ReplaceInParts(this IEnumerable<DocumentFormat.OpenXml.Packaging.OpenXmlPart> parts, string target, string replace)
{
parts.RunThroughElements(target, replace);
}
/// <summary>
/// Substitui os valores em todo o documento, ou seja, no cabeçalho, corpo e rodapé.
/// </summary>
/// <param name="element"></param>
/// <param name="target"></param>
/// <param name="replace"></param>
public static void ReplaceInDocument(this DocumentFormat.OpenXml.Packaging.MainDocumentPart mainDocumentPart, string target, string replace)
{
mainDocumentPart.HeaderParts.ReplaceInParts(target, replace);
mainDocumentPart.Document.ReplaceInElement(target, replace);
mainDocumentPart.FooterParts.ReplaceInParts(target, replace);
}
/// <summary>
/// Atravessa o elemento Paragraph, Runs e Text na procura do valor a ser substituído
/// </summary>
/// <param name="element"></param>
/// <param name="target"></param>
/// <param name="replace"></param>
private static void RunThroughElements(this DocumentFormat.OpenXml.OpenXmlElement element, string target, string replace)
{
element.Descendants<DocumentFormat.OpenXml.Wordprocessing.Paragraph>()
.RunThroughParagraphs(target, replace);
}
/// <summary>
/// Atravessa os elementos Paragraph, Runs e Text na procura do valor a ser substituído
/// </summary>
/// <param name="part"></param>
/// <param name="target"></param>
/// <param name="replace"></param>
private static void RunThroughElements(this DocumentFormat.OpenXml.Packaging.OpenXmlPart part, string target, string replace)
{
if (part.IsEqualType(typeof(HeaderPart)))
{
var partHead = part as HeaderPart;
partHead.Header.Descendants<DocumentFormat.OpenXml.Wordprocessing.Paragraph>()
.RunThroughParagraphs(target, replace);
}
else if (part.IsEqualType(typeof(FooterPart)))
{
var partFoot = part as FooterPart;
partFoot.Footer.Descendants<DocumentFormat.OpenXml.Wordprocessing.Paragraph>()
.RunThroughParagraphs(target, replace);
}
else
{
part.RootElement.Descendants<DocumentFormat.OpenXml.Wordprocessing.Paragraph>()
.RunThroughParagraphs(target, replace);
}
}
/// <summary>
/// Atravessa os elementos Paragraph, Runs e Text na procura do valor a ser substituído
/// </summary>
/// <param name="parts">Lista das partes</param>
/// <param name="target">Valor a ser substituido</param>
/// <param name="replace">Valor para substituir</param>
private static void RunThroughElements(this IEnumerable<DocumentFormat.OpenXml.Packaging.OpenXmlPart> parts, string target, string replace)
{
foreach (var part in parts)
{
part.RunThroughElements(target, replace);
}
}
/// <summary>
/// Percorre os parágrafos na busca da variável a ser substituida
/// </summary>
/// <param name="paragraphList"></param>
/// <param name="target"></param>
/// <param name="replace"></param>
private static void RunThroughParagraphs(this IEnumerable<DocumentFormat.OpenXml.Wordprocessing.Paragraph> paragraphList, string target, string replace)
{
paragraphList
.Where(p => string.IsNullOrWhiteSpace(p.InnerText) == false )
.ForEach(p => p.ReplaceAllTexts(target, replace));
}
private static void ForEach<T>(this IEnumerable<T> source, Action<T> action)
{
foreach (T item in source)
action(item);
}
/// <summary>
/// Procura o valor no elemento e depois substitui
/// </summary>
/// <param name="element"></param>
/// <param name="from"></param>
/// <param name="to"></param>
/// <author>Shimon Doodkin</author>
/// Veja também: <seealso cref="https://gist.github.com/shimondoodkin/7471075"/>
private static void ReplaceAllTexts(this DocumentFormat.OpenXml.OpenXmlElement element, string from, string to)//version 2
{
// this is a quite smart and simple algorithm by Shimon Doodkin
// the idea is to concat all texts and search it as string.
// then replace text by positions step by step
var innertext = new StringBuilder();
foreach (var eltext in element.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>())
{
innertext.Append(eltext.Text);
} // maybe to add space if previous element had no space at the end and this element has no space at beggining or add new line...no... but this problem is only with tables..
var innertextstr = innertext.ToString();
var foundat = AllIndexesOf(innertextstr, from);
var foundatend = new List<int>();
for (int z = 0; z < foundat.Count; z++)
{
foundatend.Add(foundat[z] + from.Length - 1);
}
var tofixnewlines = new List<DocumentFormat.OpenXml.Wordprocessing.Text>();
var todeleteempty = new List<DocumentFormat.OpenXml.Wordprocessing.Text>();
var tofixnewlines_str = new List<string>();
int currenttext_from = 0, currenttext_to = -1;
int innertextpos = 0;
if (foundat.Count != 0)
{
foreach (var elementText in element.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>())
{
currenttext_from = currenttext_to + 1;
currenttext_to += elementText.Text.Length;
if (foundat.Count == 0)
{
break;
}
else if (
foundat.First() <= currenttext_from && currenttext_from <= foundatend.First() // the beggining of this block is inside a found
|| foundat.First() <= currenttext_to && currenttext_to <= foundatend.First() // the end of this block is inside a found
|| currenttext_to <= foundat.First() && foundatend.First() <= currenttext_to // found is inside block
)
{
var newtext = new StringBuilder();
//is innertextpos in a match?
innertextpos = currenttext_from;
for (int curchar = 0; curchar < elementText.Text.Length; curchar++)
{
if (foundat.Count == 0)
{
break;
}
else if (innertextpos == foundat.First())
{
newtext.Append(to);
//innertextpos += ( to.Length - 1 );
//curchar += from.Length;
}
else if (innertextpos >= foundat.First() && innertextpos <= foundatend.First())
{
int replacewithcharat = innertextpos - foundat.First();
//newtext.Append(to[replacewithcharat]);
if (innertextpos == foundatend.First())
{
//append add rest;
foundat.RemoveAt(0);
foundatend.RemoveAt(0);
}
}
else
{
newtext.Append(elementText.Text[curchar]);
}
innertextpos++;
}
string newtextstr = newtext.ToString();
if (newtextstr.IndexOf('\n') == -1)
{
elementText.Text = newtextstr;
}
else
{
elementText.Text = to;
tofixnewlines.Add(elementText);
tofixnewlines_str.Add(newtextstr);
}
if (newtextstr.Length == 0)
{
todeleteempty.Add(elementText);
}
}
}
//fix newlines:
for (int i = 0; i < tofixnewlines.Count; i++)
{
string[] lines = tofixnewlines_str[i].Replace("\r", "").Split('\n');
DocumentFormat.OpenXml.Wordprocessing.Text last_el = tofixnewlines[i];
DocumentFormat.OpenXml.OpenXmlElement newline_el;
DocumentFormat.OpenXml.OpenXmlElement copy_el;
DocumentFormat.OpenXml.Wordprocessing.Text next_el;
last_el.Text = lines[0];
for (int j = 1; j < lines.Length; j++)
{
//create nextline text
copy_el = last_el.Parent.CloneNode(true);
next_el = copy_el.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>().First();
next_el.Text = lines[j];
//create newline //"<w:r><w:rPr><w:rFonts w:hint="cs" /><w:rtl /></w:rPr><w:br /></w:r>"
newline_el = last_el.Parent.CloneNode(true);
IEnumerable<DocumentFormat.OpenXml.OpenXmlElement> se = newline_el.ChildElements.Where(e => e.LocalName != "rPr");
foreach (DocumentFormat.OpenXml.OpenXmlElement item in se)
{
item.Remove();
}
newline_el.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Break());//<w:br />
last_el.Parent.InsertAfterSelf(copy_el);
last_el.Parent.InsertAfterSelf(newline_el);//add a newline after the last_el.Parent(the add order is switched,i always add after the first element but in reverse order)
last_el = next_el;
}
}
for (int i = 0; i < todeleteempty.Count; i++)
{
DocumentFormat.OpenXml.Wordprocessing.Text eltext = todeleteempty[i];
eltext.Parent.Remove();//remove empty run,not sure if this is good, i dont know mybe run could countain other elements besides text like images.
}
}
}
/// <summary>
/// Procura o valor no elemento e depois substitui
/// </summary>
/// <param name="element"></param>
/// <param name="from"></param>
/// <param name="to"></param>
public static void OpenXmlReplaceTextTest(DocumentFormat.OpenXml.OpenXmlElement element, string from, string to)//version 2
{
// this is a quite smart and simple algorithm by Shimon Doodkin
// the idea is to concat all texts and search it as string.
// then replace text by positions step by step
var innertext = new StringBuilder();
foreach (var eltext in element.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>())
{
innertext.Append(eltext.Text);
} // maybe to add space if previous element had no space at the end and this element has no space at beggining or add new line...no... but this problem is only with tables..
string innertextstr = innertext.ToString();
List<int> foundat = AllIndexesOf(innertextstr, from);
List<int> foundatend = new List<int>();
for (int z = 0; z < foundat.Count; z++)
{
foundatend.Add(foundat[z] + from.Length - 1);
}
if (foundat.Count != 0)
{
Console.WriteLine("from:'" + from + "' between " + foundat[0] + " to " + foundatend[0]);
for (int i = 0; i < innertextstr.Length; i++)
{
Console.WriteLine(" [" + i + "]: " + ( (int)innertextstr[i] ) + " '" + innertextstr[i] + "'");
}
}
Console.WriteLine(innertext.ToString().Contains(from) ? "contains" : "not found");
var tofixnewlines = new List<DocumentFormat.OpenXml.Wordprocessing.Text>();
var todeleteempty = new List<DocumentFormat.OpenXml.Wordprocessing.Text>();
var tofixnewlines_str = new List<string>();
var currenttext_from = 0;
var currenttext_to = -1;
var innertextpos = 0;
if (foundat.Count != 0)
{
foreach (var eltext in element.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>())
{
currenttext_from = currenttext_to + 1;
currenttext_to += eltext.Text.Length;
//Console.WriteLine("currenttext_from: " + currenttext_from + " currenttext_to: " + currenttext_to);
if (foundat.Count == 0) break;
if (foundat.First() <= currenttext_from && currenttext_from <= foundatend.First() // the beggining of this block is inside a found
|| foundat.First() <= currenttext_to && currenttext_to <= foundatend.First() // the end of this block is inside a found
|| currenttext_to <= foundat.First() && foundatend.First() <= currenttext_to // found is inside block
)
{
Console.WriteLine("#" + eltext.OuterXml);
var newtext = new StringBuilder();
//is innertextpos in a match?
innertextpos = currenttext_from;
for (int curchar = 0; curchar < eltext.Text.Length; curchar++)
{
if (foundat.Count == 0) break;
if (innertextpos == foundat.First())
{
newtext.Append(to);
innertextpos += ( to.Length - 1 );
curchar += from.Length;
}
else if (innertextpos >= foundat.First() && innertextpos <= foundatend.First())
{
int replacewithcharat = innertextpos - foundat.First();
//newtext.Append(to[replacewithcharat]);
if (innertextpos == foundatend.First())
{
//if (replacewithcharat < to.Length)
//{
//newtext.Append(to.Substring(replacewithcharat + 1));
//}
//append add rest;
foundat.RemoveAt(0);
foundatend.RemoveAt(0);
}
}
else
newtext.Append(eltext.Text[curchar]);
innertextpos++;
}
string newtextstr = newtext.ToString();
if (newtextstr.IndexOf('\n') == -1)
eltext.Text = newtextstr;
else
{
eltext.Text = "to be replaced";
tofixnewlines.Add(eltext);
tofixnewlines_str.Add(newtextstr);
}
if (newtextstr.Length == 0)
{
todeleteempty.Add(eltext);
}
}
}
//fix newlines:
for (int i = 0; i < tofixnewlines.Count; i++)
{
string[] lines = tofixnewlines_str[i].Replace("\r", "").Split('\n');
DocumentFormat.OpenXml.Wordprocessing.Text last_el = tofixnewlines[i];
DocumentFormat.OpenXml.OpenXmlElement newline_el;
DocumentFormat.OpenXml.OpenXmlElement copy_el;
last_el.Text = lines[0];
DocumentFormat.OpenXml.Wordprocessing.Text next_el;
for (int j = 1; j < lines.Length; j++)
{
//create nextline text
copy_el = last_el.Parent.CloneNode(true);
next_el = copy_el.Descendants<DocumentFormat.OpenXml.Wordprocessing.Text>().First();
next_el.Text = lines[j];
//create newline //"<w:r><w:rPr><w:rFonts w:hint="cs" /><w:rtl /></w:rPr><w:br /></w:r>"
newline_el = last_el.Parent.CloneNode(true);
IEnumerable<DocumentFormat.OpenXml.OpenXmlElement> se = newline_el.ChildElements.Where(e => e.LocalName != "rPr");
foreach (DocumentFormat.OpenXml.OpenXmlElement item in se) item.Remove();
newline_el.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Break());//<w:br />
last_el.Parent.InsertAfterSelf(copy_el);
last_el.Parent.InsertAfterSelf(newline_el);//add a newline after the last_el.Parent(the add order is switched,i always add after the first element but in reverse order)
last_el = next_el;
}
}
for (int i = 0; i < todeleteempty.Count; i++)
{
DocumentFormat.OpenXml.Wordprocessing.Text eltext = todeleteempty[i];
//if (eltext.Parent.ChildElements.Count <= 2 && newtextstr.Length == 0)// run.childern<=2 means Run countains the only w:rPr and w:t or just w:t
// {
eltext.Parent.Remove();//remove empty run,not sure if this is good, i dont know mybe run could countain other elements besides text like images.
// }
}
}
}
/// <summary>
/// Verifica se os tipos do objeto e class passados são iguais
/// </summary>
/// <example>
/// <code>
/// string str.IsEqualType(typeof(string));
/// </code>
/// </example>
/// <param name="a"></param>
/// <param name="b"></param>
/// <returns></returns>
public static bool IsEqualType<T>(this T a, Type type)
{
if (a.GetType() == type)
return true;
else
return false;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment