Skip to content

Instantly share code, notes, and snippets.

@klpn
Last active Jul 30, 2018
Embed
What would you like to do?
Word to PDF/A conversion adapted for Archivematica
#!/bin/sh
# Example use: after the watcher defined in fsw_word2pdf.ps1 has been started in a graphical Windows session,
# and with the file worddoc.docx present in ~/Projekt/ABM/amcommands/amtrans, run from
# the amcommands directory, ./amtr_word2pdf.sh amtrans/worddoc.docx.
conv=wordconv
base=${1##*/}
trans="$(dirname $1)"
pres="${trans}/manualNormalization/preservation"
mkdir -p $pres
cp $1 $conv
pdf="${conv}/${base%.*}.pdf"
while [ ! -f "$pdf" ]
do
inotifywait -qqt 2 -e create -e moved_to "$(dirname $pdf)"
done
sync
rm "${conv}/${base}"
mv $pdf $pres
# Based on: https://gallery.technet.microsoft.com/scriptcenter/Powershell-FileSystemWatche-dfd7084b
$folder = 'Z:\Projekt\ABM\amcommands\wordconv' # In this example, Z: is mountpoint for a Samba share on the Linux system.
$filter = '*.doc*' # Only trigger event on files with a Word extension.
Import-Module 'Z:\Projekt\ABM\amcommands\WordToPDFA.psm1'
$fsw = New-Object IO.FileSystemWatcher $folder, $filter -Property @{IncludeSubdirectories = $false;NotifyFilter = [IO.NotifyFilters]'FileName, LastWrite'}
Register-ObjectEvent $fsw Created -SourceIdentifier FileCreated -Action {
$name = $Event.SourceEventArgs.Name
$path = $Event.SourceEventArgs.FullPath
$changeType = $Event.SourceEventArgs.ChangeType
$timeStamp = $Event.TimeGenerated
# Do not try to convert temporary files created by Word.
If ($name -notlike "~$*")
{
Write-Host "The file '$name' was $changeType at $timeStamp" -fore green
Out-File -FilePath Z:\Projekt\ABM\amcommands\outlog.txt -Append -InputObject "The file '$name' was $changeType at $timeStamp"
ConvertTo-WordPDFA $path
}
}
# To stop the monitoring, run the following commands:
# Unregister-Event FileCreated
function ConvertTo-WordPDFA
{
Param(
[Parameter(Mandatory=$true,Position=0)][String]$dpath
)
$word_app = New-Object -ComObject Word.Application
$doc = $word_app.Documents.Open($dpath)
$dpath_dir = [System.IO.Path]::GetDirectoryName($dpath)
$dpath_base = [System.IO.Path]::GetFileNameWithoutExtension($dpath)
$pdf_fname = $dpath_base + ".pdf"
$pdf_fname_tmp = $dpath_base + "_tmp.pdf"
$ppath = [System.IO.Path]::Combine($dpath_dir, $pdf_fname)
$ppath_tmp = [System.IO.Path]::Combine($dpath_dir, $pdf_fname_tmp)
$doc.ExportAsFixedFormat($ppath_tmp, 17, $False, 0, 0, 0, 0, 7, $True, $False, 2, $True, $True, $True)
$doc.Close($False)
$word_app.Quit()
Move-Item $ppath_tmp $ppath
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment