Skip to content

Instantly share code, notes, and snippets.

@IMJLA
Created January 27, 2023 16:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save IMJLA/b072aad3b534cdf6967dea61fc19d4cc to your computer and use it in GitHub Desktop.
Save IMJLA/b072aad3b534cdf6967dea61fc19d4cc to your computer and use it in GitHub Desktop.
param (
[string]$TextQualifier = $null
#[string]$TextQualifier = 'None'
#[string]$TextQualifier = (0xFE -as [char]) #TEMPORARY HARD-CODED BS. REMOVE LATER
)
[System.Reflection.Assembly]::LoadWithPartialName("System.windows.forms") | Out-Null
Do {
$OpenFileDialog = New-Object System.Windows.Forms.OpenFileDialog
#$OpenFileDialog.Filter = "DAT (*.dat)| *.dat"
$OpenFileDialog.Filter = "CSV files (*.csv)| *.csv"
$OpenFileDialog.ShowDialog() | Out-Null
$OldFile = $OpenFileDialog.filename
} while ($OldFile -eq '')
Write-Host "Repair-LoadFile`tFile:`t$OldFile" -ForegroundColor Cyan
$NewFile = ("$OldFile.new")
$RepairOptions = @(
'Remove decimal points from columns that Clearwell requires to be integers (such as file size)',
'Convert a date to format MM/dd/yyyy (adding leading 0s as necessary for two-digit months and days)'
'Convert a DateTime to format MM/dd/yyyy hh:mm tt'
'Blank out a column (replace all values with blank values)'
'Delete a column'
'Add text qualifiers'
'Add columns with blank values (Clearwell minimum field requirements: BATESNUM/RCRDTYPE/FILENAME/DOCEXT)'
'Add columns to the load file from a separate CSV file'
'Add an ExtractedText column for a single folder of Bates-numbered text'
'Combine a folder path and a filename into a single column'
)
Do {
$RepairOption = $RepairOptions | Out-GridView -Title "What repairs do you want to make?" -PassThru
} while ($RepairOption -eq $null)
Write-Host "Repair-LoadFile`tRepairs to make:`t$RepairOption" -ForegroundColor Cyan
function Get-FileEncoding {
[CmdletBinding()] Param (
[Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)]
[String]$Path
)
[Byte[]]$Bytes = Get-Content -Path $Path -Encoding byte -ReadCount 4 -TotalCount 4
if ( $Bytes[0] -eq 0xef -and $Bytes[1] -eq 0xbb -and $Bytes[2] -eq 0xbf ) {
$Encoding = [System.Text.Encoding]::UTF8
}
elseif ($Bytes[0] -eq 0xfe -and $Bytes[1] -eq 0xff) {
$Encoding = [System.Text.Encoding]::Unicode
}
elseif ($Bytes[0] -eq 0 -and $Bytes[1] -eq 0 -and $Bytes[2] -eq 0xfe -and $Bytes[3] -eq 0xff) {
$Encoding = [System.Text.Encoding]::UTF32
}
elseif ($Bytes[0] -eq 0x2b -and $Bytes[1] -eq 0x2f -and $Bytes[2] -eq 0x76) {
$Encoding = [System.Text.Encoding]::UTF7
}
else{
#Supported by .Net file streams
$SupportedEncodings = [System.Text.Encoding]::GetEncodings() | Sort DisplayName
#Supported by Get-Content
#$SupportedEncodings = [System.Enum]::GetNames([Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding])
#$SampleText = Get-Content $File -Encoding $CurrentEncoding.Name
$EncodingSamples = ForEach ($CurrentEncoding in $SupportedEncodings) {
$CodePage = $CurrentEncoding.CodePage
$StreamReader = New-Object -TypeName System.IO.StreamReader($Path,[System.Text.Encoding]::GetEncoding($CodePage))
$SampleLine = $StreamReader.ReadLine()
$SampleText = $SampleLine.Substring(0,[math]::min(75,$SampleLine.Length))
$StreamReader.Close()
$StreamReader = $null
[PSCustomObject]@{
Encoding = $CurrentEncoding
SampleText = $SampleText
}
}
$EncodingOptions = $EncodingSamples | Select-Object -Property @{
Label='DisplayName'
Expression={$_.Encoding.DisplayName}
},@{
Label='Name'
Expression={$_.Encoding.Name}
},SampleText
$SelectedEncoding = $EncodingOptions | Out-GridView -Title "Select the encoding that matches the file" -PassThru
$Encoding = $SupportedEncodings | Where-Object {$_.Name -eq $SelectedEncoding.Name}
}
Write-Output $Encoding
}
$SelectedEncoding = Get-FileEncoding -Path $OldFile
Write-Host "Repair-LoadFile`tEncoding:`t$($SelectedEncoding.Name)." -ForegroundColor Cyan
$StreamReader = New-Object -TypeName System.IO.StreamReader($OldFile,[System.Text.Encoding]::GetEncoding($SelectedEncoding.CodePage))
$StreamWriter = New-Object -TypeName System.IO.StreamWriter($NewFile,[System.Text.Encoding]::GetEncoding($SelectedEncoding.CodePage))
$OldFileInfo = New-Object -TypeName System.IO.FileInfo($OldFile)
$FirstLine = $StreamReader.ReadLine()
#The 1st character is the text qualifier.
if ($TextQualifier -ne 'None') {
$TextQualifier = $FirstLine.Substring(0,1)
$TextQualifierDecimal = [int][char]$TextQualifier
$TextQualifierHex = "{0:x}" -f $TextQualifierDecimal
#After the first 2 occurrences of the Text Qualifier, the next character is the column delimiter.
$FirstOccurrence = $FirstLine.IndexOf($TextQualifier)
$SecondOccurrence = $FirstLine.IndexOf($TextQualifier,$FirstOccurrence+1)
$Delimiter = $FirstLine.Substring($SecondOccurrence+1,1)
}
else {
$Delimiter = ','
}
Write-Host "Repair-LoadFile`tText Qualifier:`t$TextQualifier`t(ASCII Decimal: $TextQualifierDecimal)`t(ASCII Hex: $TextQualifierHex)" -ForegroundColor Cyan
#$Delimiter = (0x14 -as [char]) #TEMPORARY HARD-CODED BS. REMOVE LATER
$DelimiterDecimal = [int][char]$Delimiter
$DelimiterHex = "{0:x}" -f $DelimiterDecimal
Write-Host "Repair-LoadFile`tDelimiter:`t$Delimiter`t(ASCII Decimal: $DelimiterDecimal)`t(ASCII Hex: $DelimiterHex))" -ForegroundColor Cyan
[int64]$BytesComplete = $FirstLine.Length
[int64]$PercentComplete = $BytesComplete / $($OldFileInfo.Length) * 100
Function Add-Column {
param (
[String]$OldLine,
$Delimiter,
$TextQualifier,
[String[]]$NewColumn
)
begin{
[String]$NewLine = $OldLine
}
process{
ForEach ($Column in $NewColumn){
$NewLine = $NewLine + $Delimiter + $TextQualifier + $Column + $TextQualifier
}
}
end{
Write-Output $NewLine
}
}
Function Import-CsvWithMysteryHeaders {
Do {
$OpenFileDialog = New-Object System.Windows.Forms.OpenFileDialog
$OpenFileDialog.ShowDialog() | Out-Null
$CsvFile = $OpenFileDialog.filename
} while ($CsvFile -eq '')
Write-Host "Repair-LoadFile`tCSV File:`t$CsvFile" -ForegroundColor Cyan
$CsvFirstLine = Get-Content $CsvFile -First 1
$CsvFirstLineFields = $CsvFirstLine -split ','
$UserChoice = [System.Windows.MessageBox]::Show("Use the column headers below? Click 'No' to specify different headers`r`n`r`n$CsvFirstLineFields",'Select Column Headers','YesNo','Question')
if ($UserChoice -eq 'No'){
do {
[System.Reflection.Assembly]::LoadWithPartialName('Microsoft.VisualBasic') | Out-Null
$CsvHeaders = [Microsoft.VisualBasic.Interaction]::InputBox("Enter $($CsvFirstLineFields.Count) comma-delimited headers to use", "Headers", "$CsvFirstLine") -split ','
} while ($CsvHeaders.Count -ne $CsvFirstLineFields.Count)
}else {
$CsvHeaders = $CsvFirstLineFields
}
Import-Csv $CsvFile -Header $CsvHeaders
}
Function Remove-CollectionItemByIndex {
param ($Collection,$Index)
$currentIndex = 0
ForEach ($Item in $Collection) {
if (($Index -contains $currentIndex) -or ($Index -eq $currentIndex)) {
#Write-Debug "Deleting column $currentIndex"
}
else {
Write-Output $Item
}
$currentIndex++
}
}
Function Select-Column {
param (
$Columns,
[string]$Title = "Select column to repair"
)
$CustomColumnObjects = For ($i = 0; $i -lt $ColumnCount; $i++){
New-Object -TypeName PSCustomObject -Property @{
Text = $Columns[$i]
ColumnIndex = $i
}
}
do {
$SelectedColumn = $CustomColumnObjects | Out-GridView -Title $Title -PassThru
} while ($SelectedColumn -eq $null)
Write-Output $SelectedColumn
}
Function Get-ExtractedTextRelativePath {
$FolderPath = "$($OldFile | Split-Path -Parent)\text"
if (!(Test-Path $FolderPath)){
Do {
$FolderBrowserDialog = New-Object System.Windows.Forms.FolderBrowserDialog
$FolderBrowserDialog.ShowDialog() | Out-Null
$SelectedPath = $FolderBrowserDialog.SelectedPath
} while ($SelectedPath -eq '')
$FolderPath = $SelectedPath
}
Write-Host "Repair-LoadFile`tExtractedText folder:`t$FolderPath" -ForegroundColor Cyan
$FileNames = (Get-ChildItem -Path $FolderPath).Name
$Leaf = $Path | Split-Path -Leaf
$Prefix = "\$Leaf\"
ForEach ($FileName in $FileNames) {
Write-Output "$Prefix$FileName"
}
}
function Split-LineRemoveQualifier {
param (
$OldLine = '"aagwrapper.dll","10.0.14393.4046","28-Oct-2020","21:47","67,072"',
$Delimiter = ',',
$TextQualifier = '"'
)
if ($TextQualifier) {
$NewString = $OldLine.Substring(1,$OldLine.Length-1)
do {
$NextQualifier = $NewString.IndexOf($TextQualifier)
$NewString.Substring(0,$NextQualifier)
if ($NextQualifier -ne $NewString.Length -1) {
$NewString = $NewString.Substring($NextQualifier + 3,($NewString.Length - 3 - $NextQualifier))
}
}
until ($NextQualifier -eq $NewString.Length -1)
}
else {
$OldLine -split $Delimiter
}
}
function Split-Line {
param (
$Line,
$Delimiter = ',',
$TextQualifier = '"'
)
if ($TextQualifier) {
$NewString = $Line
while ($NewString.Length -gt 0) {
# The first .Substring is to skip the opening text qualifier (first char)
# The .IndexOf is to find the next text qualifier
$NextQualifier = $NewString.Substring( 1, $NewString.Length - 1).IndexOf($TextQualifier) + 1
if ($NextQualifier -eq ($NewString.Length - 1)) {
$NewString
break
}
else {
$NewString.Substring(0,$NextQualifier + 1)
$NewString = $NewString.Substring($NextQualifier + 2,($NewString.Length - 2 - $NextQualifier))
}
}
}
else {
$Line -split $Delimiter
}
}
#Split the line into columns
#$Split = $FirstLine -split $Delimiter
$Split = Split-Line -Line $FirstLine -Delimiter $Delimiter -TextQualifier $TextQualifier
#Count the number of columns
$ColumnCount = $Split.Count
Write-Host "Repair-LoadFile`tColumns: $ColumnCount" -ForegroundColor Cyan
# This Switch statement handles any operations that involve adding or removing columns (for the first line only)
Switch ($RepairOption){
'Add an ExtractedText column for a single folder of Bates-numbered text' {
$CsvContent = Get-ExtractedTextRelativePath
$CsvHeaders = 'ExtractedText'
$AddColumnParams = @{
OldLine = $FirstLine
Delimiter = $Delimiter
TextQualifier = $TextQualifier
NewColumn = $CsvHeaders
}
$FirstLine = Add-Column @AddColumnParams
}
'Add columns with blank values (Clearwell minimum field requirements: BATESNUM/RCRDTYPE/FILENAME/DOCEXT)'{
$AddColumnParams = @{
OldLine = $FirstLine
Delimiter = $Delimiter
TextQualifier = $TextQualifier
NewColumn = @('BATESNUM','RCRDTYPE','FILENAME','DOCEXT')
}
$FirstLine = Add-Column @AddColumnParams
}
'Add columns to the load file from a separate CSV file' {
$CsvContent = Import-CsvWithMysteryHeaders
$CsvHeaders = ($CsvContent | Get-Member -MemberType NoteProperty).Name
$AddColumnParams = @{
OldLine = $FirstLine
Delimiter = $Delimiter
TextQualifier = $TextQualifier
NewColumn = $CsvHeaders
}
$FirstLine = Add-Column @AddColumnParams
}
'Delete a column' {
$SelectedColumn = Select-Column -Columns $Split
ForEach ($Col in $SelectedColumn){
$Split = Remove-CollectionItemByIndex -Collection $Split -Index $Col.ColumnIndex
}
$FirstLine = $Split -join $Delimiter
}
'Combine a folder path and a filename into a single column' {
$SelectedColumn = Select-Column -Columns $Split -Title "Select the column with the folder path (only select 1)"
$FileNameColumn = Select-Column -Columns $Split -Title "Select the column with the file name"
$OldColumnValue = $Split[$SelectedColumn.ColumnIndex].TrimEnd($TextQualifier)
$NewColumnValue = "$OldColumnValue`_$($Split[$FileNameColumn.ColumnIndex].TrimStart($TextQualifier))"
$Split[$SelectedColumn.ColumnIndex] = $NewColumnValue
$Split = Remove-CollectionItemByIndex -Collection $Split -Index $FileNameColumn.ColumnIndex
$FirstLine = $Split -join $Delimiter
}
default {
$SelectedColumn = Select-Column -Columns $Split
}
}
$StartTime = Get-Date
$StreamWriter.WriteLine($FirstLine)
$CurrentLineNumber = 1
While (($OldLine = $StreamReader.ReadLine()) -ne $null) {
$CurrentLineNumber++
#Estimate progress
$BytesComplete += $OldLine.Length
$PercentComplete = $BytesComplete / $($OldFileInfo.Length) * 100
$CurrentTime = Get-Date
$Elapsed = $CurrentTime - $StartTime
$EstimatedTotalSeconds = 100 / ($PercentComplete+1) * $($Elapsed.TotalSeconds)
$Remaining = $EstimatedTotalSeconds - $Elapsed.TotalSeconds
$ETC = $CurrentTime.AddSeconds($Remaining)
$Activity = "Reading $OldFile"
$Status = "Read $BytesComplete of $($OldFileInfo.Length) bytes ($PercentComplete%) "
$CurrentOp = "Started $(Get-Date $StartTime -Format s)`tElapsed: $Elapsed`tCompletion at: $(Get-Date $ETC -Format s)"
Write-Progress -Activity $Activity -Status $Status -PercentComplete $PercentComplete -SecondsRemaining $Remaining -CurrentOperation $CurrentOp
#$Split = $OldLine -split $Delimiter
$Split = Split-Line -Line $OldLine -Delimiter $Delimiter -TextQualifier $TextQualifier
if ($Split.Count -ne $ColumnCount) {
Write-Warning "Line $($CurrentLineNumber) has $($Split.Count) fields instead of $ColumnCount. This means the delimiter (ASCII Dec $DelimiterDecimal / Hex $DelimiterHex) exists in the data set, or the data set is improperly formatted. The producer of the load file will need to fix this. For now, in the new file the line will be rewritten as-is with no repairs performed."
$StreamWriter.WriteLine($OldLine)
continue
}
ForEach ($CurrentColumn in $SelectedColumn) {
$OldColumnValue = $Split[$CurrentColumn.ColumnIndex]
Switch ($RepairOption){
'Remove decimal points from columns that Clearwell requires to be integers (such as file size)' {
$NewColumnValue = $OldColumnValue -replace '\.[\d]*',''
}
'Convert a date to format MM/dd/yyyy (adding leading 0s as necessary for two-digit months and days)'{
$OldDate = $OldColumnValue -replace $TextQualifier,''
if ("" -ne $OldDate){
$NewDate = Get-Date $OldDate -Format 'MM/dd/yyyy'
}
else{
$NewDate = $OldDate
}
$NewColumnValue = "$TextQualifier$NewDate$TextQualifier"
}
'Convert a DateTime to format MM/dd/yyyy hh:mm tt' {
$OldDate = $OldColumnValue -replace $TextQualifier,''
if ("" -ne $OldDate){
try {
$NewDate = Get-Date $OldDate -Format 'MM/dd/yyyy hh:mm tt' -ErrorAction Stop
}
catch {
$TempDate = [datetime]::ParseExact($OldDate,'MM/dd/yyyy hh:mm t',[Globalization.CultureInfo]::InvariantCulture)
$NewDate = Get-Date $TempDate -Format 'MM/dd/yyyy hh:mm tt' -ErrorAction Stop
}
}
else{
$NewDate = $OldDate
}
$NewColumnValue = "$TextQualifier$NewDate$TextQualifier"
}
'Blank out a column (replace all values with blank values)'{
$NewColumnValue = "$TextQualifier$TextQualifier"
}
'Add text qualifiers'{
$NewColumnValue = "$TextQualifier$OldColumnValue$TextQualifier"
}
'Add columns with blank values (Clearwell minimum field requirements: BATESNUM/RCRDTYPE/FILENAME/DOCEXT)'{
$NewColumnValue = $OldColumnValue
}
'Delete a column' {
$Split = Remove-CollectionItemByIndex -Collection $Split -Index $CurrentColumn.ColumnIndex
Continue
}
'Combine a folder path and a filename into a single column' {
$NewColumnValue = "$($OldColumnValue.TrimEnd($TextQualifier))\$($Split[$FileNameColumn.ColumnIndex].TrimStart($TextQualifier))"
$Split = Remove-CollectionItemByIndex -Collection $Split -Index $FileNameColumn.ColumnIndex
}
default {
Write-Host "No repair option was selected. I'm not sure how that happened but no repairs have been made." -ForegroundColor Cyan
Exit
}
}
Switch ($RepairOption) {
'Delete a column' {}
default{
Write-Debug "Updating column $($CurrentColumn.ColumnIndex)"
$Split[$CurrentColumn.ColumnIndex] = $NewColumnValue
}
}
}
Switch ($RepairOption){
'Add columns with blank values (Clearwell minimum field requirements: BATESNUM/RCRDTYPE/FILENAME/DOCEXT)'{
$AddColumnParams = @{
OldLine = $OldLine
Delimiter = $Delimiter
TextQualifier = $TextQualifier
NewColumn = @('NA','NA','NA','NA')
}
$NewLine = Add-Column @AddColumnParams
}
'Add columns to the load file from a separate CSV file' {
$NewLine = $OldLine
ForEach ($Prop in $CsvHeaders) {
$NewColumn = $CsvContent[$CurrentLineNumber - 2]."$Prop"
if ($NewColumn -eq $null) {
$NewColumn = ''
}
$AddColumnParams = @{
OldLine = $NewLine
Delimiter = $Delimiter
TextQualifier = $TextQualifier
NewColumn = $NewColumn
}
$NewLine = Add-Column @AddColumnParams
}
}
'Add an ExtractedText column for a single folder of Bates-numbered text' {
$NewLine = $OldLine
ForEach ($Prop in $CsvHeaders) {
$NewColumn = $CsvContent[$CurrentLineNumber - 2]
if ($NewColumn -eq $null) {
$NewColumn = ''
}
$AddColumnParams = @{
OldLine = $NewLine
Delimiter = $Delimiter
TextQualifier = $TextQualifier
NewColumn = $NewColumn
}
$NewLine = Add-Column @AddColumnParams
}
}
default {
$NewLine = $Split -join $Delimiter
}
}
$StreamWriter.WriteLine($NewLine)
}
$StreamReader.Close()
$StreamWriter.Close()
Write-Host "Repair-LoadFile Saved $CurrentLineNumber lines to $NewFile"
Write-Progress -Activity @"
Reading $OldFile
Saving repaired file to $NewFile
"@ -Completed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment