Skip to content

Instantly share code, notes, and snippets.

@tobiasviehweger
Created May 21, 2015 10:03
Show Gist options
  • Save tobiasviehweger/e13c858c57e0a5965471 to your computer and use it in GitHub Desktop.
Save tobiasviehweger/e13c858c57e0a5965471 to your computer and use it in GitHub Desktop.
Parsing Outlook WordOpenXML to OOXML SDK objects
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Packaging;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml;
using System.Xml.XPath;
namespace OOXMLTest
{
class Program
{
static void Main(string[] args)
{
var wordXml = File.ReadAllText("source.xml");
var doc = WordprocessingDocument.Open(createPackageFromWordOpenXML(wordXml));
var body = doc.MainDocumentPart.Document.Body;
var styles = doc.MainDocumentPart.StyleDefinitionsPart.Styles;
foreach (var child in body.ChildElements)
{
if (child.LocalName == "p")
{
Console.WriteLine("p -------------------------");
var para = child as Paragraph;
if (para.ParagraphProperties != null)
{
var props = para.ParagraphProperties;
if (props.ParagraphStyleId != null)
{
var style = styles.FirstOrDefault((s) => s is Style && (s as Style).StyleId == props.ParagraphStyleId.Val.Value) as Style;
if (style != null)
{
Console.WriteLine("Style: " + style.StyleId);
var fontSize = style.StyleRunProperties.FontSize;
}
}
}
foreach (var run in child.ChildElements)
{
Console.WriteLine(" " + run.LocalName + " ---------------------");
if (run.LocalName == "r")
{
var casted = run as Run;
if(casted.RunProperties.Bold != null)
Console.WriteLine("Bold");
if (casted.RunProperties.Underline != null)
Console.WriteLine("Underline");
if (casted.RunProperties.Italic != null)
Console.WriteLine("Italic");
}
Console.WriteLine(run.InnerText);
Console.WriteLine(" /" + run.LocalName + " ---------------------");
}
Console.WriteLine("/p -------------------------");
}
}
Console.In.Read();
}
private static Package createPackageFromWordOpenXML(string wordOpenXML)
{
string packageXmlns = "http://schemas.microsoft.com/office/2006/xmlPackage";
MemoryStream stream = new MemoryStream();
Package newPkg = System.IO.Packaging.ZipPackage.Open(stream, FileMode.Create);
try
{
XPathDocument xpDocument = new XPathDocument(new StringReader(wordOpenXML));
XPathNavigator xpNavigator = xpDocument.CreateNavigator();
XmlNamespaceManager nsManager = new XmlNamespaceManager(xpNavigator.NameTable);
nsManager.AddNamespace("pkg", packageXmlns);
XPathNodeIterator xpIterator = xpNavigator.Select("//pkg:part", nsManager);
while (xpIterator.MoveNext())
{
Uri partUri = new Uri(xpIterator.Current.GetAttribute("name", packageXmlns), UriKind.Relative);
PackagePart pkgPart = newPkg.CreatePart(partUri, xpIterator.Current.GetAttribute("contentType", packageXmlns));
// Set this package part's contents to this XML node's inner XML, sans its surrounding xmlData element.
string strInnerXml = xpIterator.Current.InnerXml
.Replace("<pkg:xmlData xmlns:pkg=\"" + packageXmlns + "\">", "")
.Replace("</pkg:xmlData>", "");
byte[] buffer = Encoding.UTF8.GetBytes(strInnerXml);
pkgPart.GetStream().Write(buffer, 0, buffer.Length);
}
newPkg.Flush();
}
catch {
}
return newPkg;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment