-
-
Save phyoewaipaing/1e5abd9aad394882ab26355c1ea4397c to your computer and use it in GitHub Desktop.
Replace text characters or strings in bulk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<# | |
.SYNOPSIS | |
Script to replace multiple characters or strings in the csv or txt file | |
.DESCRIPTION | |
Script to replace multiple characters or strings in the csv or txt file by using characters or string mapping pairs as an input | |
Use cases are converting/deleting multiple non-ASCII and English Characters and strings | |
With the csv source file, the script has an option to exclude specific columns from the characters being converted. | |
.PARAMETER FilePath | |
The input file path, usually txt or csv file. | |
.PARAMETER OutputFilePath | |
The output file path. | |
.PARAMETER ColumnsToSkip | |
Column name on which the character conversion will be skipped | |
.PARAMETER InputEncoding | |
Encoding used to read the source file | |
.PARAMETER OutputEncoding | |
Encoding used to write the destination file | |
.PARAMETER WordMatchOnly | |
If used, the change will happen if the characters match only the entire word. Not applicable to special characters. | |
.PARAMETER StringDelimiter | |
(optional) The delimiter used to split each pair out of the string. Eg., in the below line, the pipe is used to delimit the string | |
'Ä:A|à:a|â:a|1st:First|2nd:Second' | |
Default value is comma (,). If the any of the mapping values contains comma, then you should use the another delimiter which is not included in the mapping string. | |
.PARAMETER PairDelimiter | |
(optional) The delimiter used to split each mapping pair. Eg., in the below line, fullcolumn is the delimiter | |
'Ä:A|à:a|â:a|1st:First|2nd:Second' | |
Default value is fullcolumn (:). If any of the mapping values contains fullcolumn, then you should use the another delimiter which is not included in the mapping string. | |
.PARAMETER CharacterMapping | |
The character mapping on which characters replacement or string replacement will happen based on these mapping pairs. Note that it's case-sensitive replacement. | |
The entire string should be single quoted. Each Pair of strings or characters is comma-delimited (by default) and each mapping is fullcolumn-delimited (by default). | |
Eg: 'Ä:A,à:a,â:a' | |
If there is any single quote in the mapping pair, then you must write this single quote as two single quotes. | |
Eg: 'Ä:A,à:a,â:a,'':SingleQuote,":DoubleQuotes' | |
.EXAMPLE | |
.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.csv -OutputFilePath Output.csv -ColumnsToSkip "secret" -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second' | |
Replace characters in all csv columns except "secret" column according to the mapping pair defined in -CharacterMapping | |
.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.txt -OutputFilePath Output.txt -WordMatchOnly -InputEncoding ASCII -OutputEncoding UTF8 -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second' | |
Replace exact words in the text file according to the given mapping pair using ASCII encoding to read the text file and UTF8 to write the output file. | |
.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.txt -OutputFilePath Output.txt -StringDelimiter '|' -PairDelimiter ';' -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second' | |
Replace characters in the text file according to the given mapping pair, using pipeline (|) to split the string and semicolon (;) to split the character pairs. | |
Author : Phyoe Wai Paing | |
Version : 1.0 : 22.Nov.2023 : Initial Release | |
: 1.1 : 30.Mar.2024 : Both txt and csv files are supported as an input file. | |
Support escape characters in the CharacterMapping parameter. | |
Mapping string changed from double-quoted to single-quoted. | |
Added delimiter for mapping pair and string pair | |
.LINK | |
https://www.scriptinghouse.com/ | |
#> | |
param ( | |
[Parameter(mandatory=$true)] | |
[string]$FilePath, | |
[Parameter(mandatory=$true)] | |
[string]$OutputFilePath, | |
[string[]]$ColumnsToSkip = @(), | |
[ValidateSet("ascii","string","unicode","bigendianunicode","utf8","utf7","utf32","default","oem")] | |
[string]$InputEncoding='utf8', | |
[ValidateSet("ascii","string","unicode","bigendianunicode","utf8","utf7","utf32","default","oem")] | |
[string]$OutputEncoding='utf8', | |
[switch]$WordMatchOnly, | |
[string]$CharacterMappingString, | |
[string]$StringDelimiter=',', | |
[string]$PairDelimiter = ':' | |
) | |
# Check if file exists | |
if (-not (Test-Path -Path $FilePath)) | |
{ | |
Write-Host "File not found: $FilePath" | |
exit | |
} | |
# Create a Dictionary of string for comparsion in case-insensitive manner | |
$caseSensitiveCharacterMapping = New-Object 'System.Collections.Generic.Dictionary[string,string]'([System.StringComparer]::Ordinal) | |
if ($CharacterMappingString -ne "") | |
{ | |
$pairs = $CharacterMappingString.Split($StringDelimiter) | |
foreach ($pair in $pairs) | |
{ | |
$keyValue = $pair.Split($PairDelimiter) | |
if ($keyValue.Length -eq 2) { | |
$caseSensitiveCharacterMapping.Add($keyValue[0], $keyValue[1]) | |
} | |
} | |
# Function to replace characters according to the provided mapping | |
function ReplaceCharacters($inputString) | |
{ | |
foreach ($key in $caseSensitiveCharacterMapping.Keys) | |
{ | |
$KeyEscaped = [regex]::escape($Key) ## If the Key contains the special characters, we'd need to escape it | |
If ($WordMatchOnly) | |
{ | |
$inputString = $inputString -creplace "\b$KeyEscaped\b", $caseSensitiveCharacterMapping[$key] | |
} | |
else | |
{ | |
$inputString = $inputString -creplace $KeyEscaped, $caseSensitiveCharacterMapping[$key] | |
} | |
} | |
return $inputString | |
} | |
# Read file & Convert | |
Write-Host "Converting. Please wait..." -NoNewLine | |
If ((Get-ChildItem -Path $FilePath).Extension -eq '.csv') | |
{ | |
$data = Import-Csv -Path $FilePath -Encoding $InputEncoding | |
# Process data | |
foreach ($entry in $data) { | |
foreach ($column in $entry.PSObject.Properties) { | |
if ($ColumnsToSkip -notcontains $column.Name) { | |
$entry.$($column.Name) = ReplaceCharacters($entry.$($column.Name)) | |
} | |
} | |
} | |
$data | Export-Csv -Path $OutputFilePath -NoTypeInformation -Encoding $OutputEncoding | |
} | |
else | |
{ | |
$data = Get-Content -Path $FilePath -Encoding $InputEncoding | |
$data = $data | % { ReplaceCharacters($_) } | |
$data | out-file -FilePath $OutputFilePath -Encoding $OutputEncoding | |
} | |
} | |
Write-Host "`rThe output file saved to $OutputFilePath." | |
Write-Host "Encoding: $InputEncoding is used to read the file and Encoding: $OutputEncoding is used to write the file." | |
Write-Host -fore yellow "If there are some incorrect characters in the output, try different encoding in -InputEncoding and -OutputEncoding parameter from one of the below values:`nascii,string,unicode,bigendianunicode,utf8,utf7,utf32,default,oem" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment