Skip to content

Instantly share code, notes, and snippets.

@scriptingstudio
Last active February 23, 2024 05:56
Show Gist options
  • Save scriptingstudio/e6eec28715fc92afc2a4ad967b879a8e to your computer and use it in GitHub Desktop.
Save scriptingstudio/e6eec28715fc92afc2a4ad967b879a8e to your computer and use it in GitHub Desktop.
Simple PDF Reader
# https://www.nuget.org/packages/itextsharp/ (5.5.13.3)
# https://github.com/itext/itextsharp/releases/tag/5.5.13
Add-Type -Path "$psscriptroot\itextsharp.dll" # 5.5.13 and lower have no dependencies
function Read-Pdf {
[CmdLetBinding(DefaultParameterSetName="Path")]
param (
[Parameter(ParameterSetName="Path", Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName, Position=0)]
[alias('fullname')][string[]]$path,
[Parameter(ParameterSetName="Instance", Position=0)]
[iTextSharp.text.pdf.pdfreader]$instance,
[int[]]$page, # page selector by number
[switch]$raw, # return content as a single string
[alias('filter')][string[]]$pattern, # content filter; the value is treated as a regular expression
[switch]$passThru, # return PDF object
[switch]$enumerate # separate output by file/instance
)
end {
$paths = if ($input) {$input} elseif ($path) {$path} elseif ($instance) {$instance}
foreach ($file in $paths) {
$pdf = if ($instance) {$instance}
else {
if ($file.gettype().name -eq 'DirectoryInfo') {continue}
if ($file.gettype().name -eq 'FileInfo') {$file = $file.fullname}
$file = (Resolve-Path $file -ErrorAction 0).ProviderPath
if ($file -and (Test-Path $file)) {
#[iTextSharp.text.pdf.pdfreader]::new($file)
[iTextSharp.text.pdf.pdfreader]::new([iTextSharp.text.pdf.RandomAccessFileOrArray]::new($file,$true),$null)
} else {
Write-Warning "File '$file' not found."
continue
}
}
if ($passThru) {$pdf; continue}
$range = 1..$pdf.NumberOfPages
if ($page) {$range = ($page.where{$_ -in $range})}
$text = foreach ($p in $range) {
if ($pattern) {
[string[]]([iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($pdf,$p).Split([char]0x000A) | Select-String -pattern $pattern)
} else {
[iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($pdf,$p).Split([char]0x000A)
}
}
if ($enumerate) {
[pscustomobject]@{
File = if ($instance) {''} else {$file}
Content = if ($raw) {$text | Out-String} else {$text}
}
} elseif ($raw) {$text | Out-String} else {$text}
if (-not $instance) {$pdf.Close(); $pdf.Dispose()}
}
}
} # END Read-Pdf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment