Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Initial works on a simple Ms Word templating engine, using Office Open XML SDK
open System;
open DocumentFormat.OpenXml;
open DocumentFormat.OpenXml.Packaging;
open DocumentFormat.OpenXml.Wordprocessing;
[<Literal>]
let openingBracket = "<#"
[<Literal>]
let closingBracket = "#>"
let indexOf (str:string) (value:string) startIndex =
match str.IndexOf(value, startIndex, StringComparison.Ordinal) with
| index when index >= 0 -> Some index
| _ -> None
type BracketMatch =
| Opening of index : int
| Full of openingIndex : int * closingIndex : int
type ExpressionLocation =
{
openingBracketIndex: int;
closingBracketIndex: int;
openingNode: Text;
closingNode: Text
}
type PartialExpression = { node: Text; index: int }
type FoundExpressions = { complete: ExpressionLocation list; opened: PartialExpression option }
type System.Text.StringBuilder with
member this.AppendMany(items: string list) =
items |> List.iter (fun str -> this.Append(str) |> ignore)
this
let fetchUntilNull fetcher entity =
let rec fetchRec current acc =
match fetcher current with
| null -> acc
| item -> item::acc |> fetchRec item
fetchRec entity [] |> List.rev
let allFollowingSiblings<'T when 'T :> OpenXmlElement and 'T : null> node = fetchUntilNull (fun (n:'T) -> n.NextSibling<'T>()) node
let allPreviousSiblings<'T when 'T :> OpenXmlElement and 'T : null> node = fetchUntilNull (fun (n:'T) -> n.PreviousSibling<'T>()) node
let rec followingSiblingsUntil<'T when 'T :> OpenXmlElement and 'T : null> (endNode:'T) (currentNode:'T) =
let rec innerFetcher (currentNode:'T) acc =
match currentNode.NextSibling<'T>() with
| null -> failwith "nodes are not siblings"
| sibling when Object.ReferenceEquals(sibling, endNode) -> acc
| sibling -> innerFetcher sibling (sibling::acc)
innerFetcher currentNode [] |> List.rev
let getTextNodesBetween (node1:Text) node2 =
match node1, node2 with
| (n1, n2) when n1 = n2 -> [] // text node level
| (n1, n2) when n1.Parent = n2.Parent -> followingSiblingsUntil n2 n1 |> List.rev // run level
| (n1, n2) when n1.Parent.Parent = n2.Parent.Parent -> // run's parent level
let cousins = followingSiblingsUntil n2.Parent n1.Parent |> List.collect (fun node -> node.Descendants<Text>() |> List.ofSeq)
(allFollowingSiblings n1) @ cousins @ (allPreviousSiblings n2)
| _ -> failwith "nodes have no common ancestor"
let getExpressionText exprLocation =
let { openingBracketIndex = startIndex; closingBracketIndex = endIndex; openingNode = startNode; closingNode = endNode } = exprLocation
let startNodeText = startNode.Text
if startNode = endNode then startNodeText.Substring(startIndex + openingBracket.Length, endIndex - startIndex - openingBracket.Length)
else
let builder = new System.Text.StringBuilder()
builder.Append(startNodeText.Substring(startIndex + openingBracket.Length))
.AppendMany(getTextNodesBetween startNode endNode |> List.map (fun txtNode -> txtNode.Text))
.Append(endNode.Text.Substring(0, endIndex)) |> ignore
builder.ToString()
let simpleMatcher startIndex (node:Text) =
let text = node.Text
let matchExpressionInString startIndex =
match indexOf text openingBracket startIndex with
| Some openingIndex ->
match indexOf text closingBracket (openingIndex + closingBracket.Length) with
| Some closingIndex -> Some (Full (openingIndex, closingIndex))
| None -> Some (Opening openingIndex)
| None -> None
let toExpressionLocation a b = { openingBracketIndex = a; closingBracketIndex = b; openingNode = node; closingNode = node }
let rec matchMany index acc =
match matchExpressionInString index with
| Some matchType ->
let accList = fst acc;
match matchType with
| Opening index -> accList, Some index
| Full (opening, closing) -> ((toExpressionLocation opening closing)::accList, None) |> matchMany (closing + closingBracket.Length)
| None -> acc
match matchMany startIndex ([], None) with
| (list, index) -> { complete = list; opened = Option.map (fun i -> { node = node; index = i}) index }
let completingMatcher (partial:PartialExpression) (currentNode:Text) =
let matchClosing (startNode:Text) (currentNode:Text) =
let haveSameAncestor (node1:Text) node2 =
node1 = node2 // text node
|| node1.Parent = node2.Parent // run node
|| node1.Parent.Parent = node2.Parent.Parent // run's parent node
match indexOf currentNode.Text closingBracket 0 with
| Some index -> if haveSameAncestor startNode currentNode then Some index else None
| None -> None
match matchClosing partial.node currentNode with
| Some closingIndex ->
let completedExpression =
{
openingBracketIndex = partial.index; openingNode = partial.node;
closingBracketIndex = closingIndex; closingNode = currentNode
}
let remainingExpressions = simpleMatcher (closingIndex + closingBracket.Length) currentNode
{ complete = completedExpression::remainingExpressions.complete; opened = remainingExpressions.opened }
| None -> { complete = []; opened = Some partial}
let matchExpressions (textNodes: Text list) =
let rec matchRec matcher (elements: Text list) acc =
match elements with
| [] -> acc
| node::rest ->
let { complete = completeExpressions; opened = openedExpression } = matcher node
match openedExpression with
| Some opened -> matchRec (completingMatcher opened) rest completeExpressions@acc
| None -> matchRec (simpleMatcher 0) rest completeExpressions@acc
matchRec (simpleMatcher 0) textNodes []
let doJob () =
use wordDoc = WordprocessingDocument.Open(@"G:\Var\kwiczolek.docx", true)
let textElements = wordDoc.MainDocumentPart.Document.Body.Descendants<Text>() |> List.ofSeq
let matchedExpressions = matchExpressions textElements
let puup = matchedExpressions |> List.map (fun expr -> getExpressionText expr)
()
[<EntryPoint>]
let main argv =
doJob()
printfn "%A" argv
0 // return an integer exit code
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.