Skip to content

Instantly share code, notes, and snippets.

@mjsqu
Created July 10, 2020 05:40
Show Gist options
  • Save mjsqu/bc16cd0e186a669d2e47eb37fe74afa5 to your computer and use it in GitHub Desktop.
Save mjsqu/bc16cd0e186a669d2e47eb37fe74afa5 to your computer and use it in GitHub Desktop.
# Prints the lines from a document (docfile) that match a regex (rx)
Function PrintWord($docfile,$rx){
# Expand-Archive to a temporary directory
$tempfile = New-TemporaryFile
$tempzipfile = "{0}{1}" -f $($tempfile.Fullname,'.zip')
Remove-Item $tempfile
Copy-Item $docfile $tempzipfile
# Expand-Archive to a temporary directory
$targetdir = "{0}\{1}" -f $($tempfile.Directory,$tempfile.BaseName)
# Reset progress bar display
$global:ProgressPreference = 'SilentlyContinue'
Expand-Archive $tempzipfile -DestinationPath $targetdir
$global:ProgressPreference = 'Continue'
Remove-Item $tempzipfile
# Go down into word/document.xml
$docxmlpath = "word\document.xml"
$xmlfile = "{0}\{1}" -f $($targetdir,$docxmlpath)
# Parse file
[xml]$document = Get-Content $xmlfile
# Tidy up
gci $targetdir -Recurse | ? { $_.PSISContainer -eq $False } | Remove-Item
Remove-Item $targetdir -Recurse
# Document/body/p contains multiple r.t elements
# some are structured such that the text is just in p.r.t
# others have space="preserve" in which the #text attribute contains the text
$matchlist = @()
ForEach ($p in $document.document.body.p) {
$tcomplete = ""
# Loop over the text elements in the paragraph and append text
ForEach ($t in $p.r.t) {
if ($t.space -eq 'preserve') {
$tcomplete += $t.'#text'
}
else {
$tcomplete += $t
}
}
# Match the complete paragraph text with the passed regex
if ($tcomplete -match $rx) {
$matchlist += $tcomplete
}
}
$matchlist
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment