phyoewaipaing/Replace_Character_Pairs_v1.1.ps1 Secret

## Replace_Character_Pairs_v1.1.ps1
<#
	.SYNOPSIS
	Script to replace multiple characters or strings in the csv or txt file

	.DESCRIPTION
	Script to replace multiple characters or strings in the csv or txt file by using characters or string mapping pairs as an input
	Use cases are converting/deleting multiple non-ASCII and English Characters and strings
	With the csv source file, the script has an option to exclude specific columns from the characters being converted.

	.PARAMETER FilePath
	The input file path, usually txt or csv file.

	.PARAMETER OutputFilePath
	The output file path.

	.PARAMETER ColumnsToSkip
	Column name on which the character conversion will be skipped

	.PARAMETER InputEncoding
	Encoding used to read the source file

	.PARAMETER OutputEncoding
	Encoding used to write the destination file

	.PARAMETER WordMatchOnly
	If used, the change will happen if the characters match only the entire word. Not applicable to special characters.

	.PARAMETER StringDelimiter
	(optional) The delimiter used to split each pair out of the string. Eg., in the below line, the pipe is used to delimit the string
	'Ä:A|à:a|â:a|1st:First|2nd:Second'
	Default value is comma (,). If the any of the mapping values contains comma, then you should use the another delimiter which is not included in the mapping string.

	.PARAMETER PairDelimiter
	(optional) The delimiter used to split each mapping pair. Eg., in the below line, fullcolumn is the delimiter
	'Ä:A|à:a|â:a|1st:First|2nd:Second'
	Default value is fullcolumn (:). If any of the mapping values contains fullcolumn, then you should use the another delimiter which is not included in the mapping string.

	.PARAMETER CharacterMapping
	The character mapping on which characters replacement or string replacement will happen based on these mapping pairs. Note that it's case-sensitive replacement.
	The entire string should be single quoted. Each Pair of strings or characters is comma-delimited (by default) and each mapping is fullcolumn-delimited (by default).
	Eg:	'Ä:A,à:a,â:a'
	If there is any single quote in the mapping pair, then you must write this single quote as two single quotes.
	Eg: 'Ä:A,à:a,â:a,'':SingleQuote,":DoubleQuotes'

	.EXAMPLE
	.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.csv -OutputFilePath Output.csv -ColumnsToSkip "secret" -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second'
	Replace characters in all csv columns except "secret" column according to the mapping pair defined in -CharacterMapping

	.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.txt -OutputFilePath Output.txt -WordMatchOnly -InputEncoding ASCII -OutputEncoding UTF8 -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second'
	Replace exact words in the text file according to the given mapping pair using ASCII encoding to read the text file and UTF8 to write the output file.

	.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.txt -OutputFilePath Output.txt -StringDelimiter '|' -PairDelimiter ';' -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second'
	Replace characters in the text file according to the given mapping pair, using pipeline (|) to split the string and semicolon (;) to split the character pairs.

	Author  : Phyoe Wai Paing
	Version : 1.0 : 22.Nov.2023 : Initial Release
			: 1.1 : 30.Mar.2024 : Both txt and csv files are supported as an input file.
								  Support escape characters in the CharacterMapping parameter.
								  Mapping string changed from double-quoted to single-quoted.
								  Added delimiter for mapping pair and string pair

.LINK
	https://www.scriptinghouse.com/
#>

param (
[Parameter(mandatory=$true)]
[string]$FilePath,
[Parameter(mandatory=$true)]
[string]$OutputFilePath,
[string[]]$ColumnsToSkip = @(),
[ValidateSet("ascii","string","unicode","bigendianunicode","utf8","utf7","utf32","default","oem")]
[string]$InputEncoding='utf8',
[ValidateSet("ascii","string","unicode","bigendianunicode","utf8","utf7","utf32","default","oem")]
[string]$OutputEncoding='utf8',
[switch]$WordMatchOnly,
[string]$CharacterMappingString,
[string]$StringDelimiter=',',
[string]$PairDelimiter = ':'
)

# Check if file exists
if (-not (Test-Path -Path $FilePath))
	{
		Write-Host "File not found: $FilePath"
		exit
	}

# Create a Dictionary of string for comparsion in case-insensitive manner
$caseSensitiveCharacterMapping = New-Object 'System.Collections.Generic.Dictionary[string,string]'([System.StringComparer]::Ordinal)

if ($CharacterMappingString -ne "")
	{
		$pairs = $CharacterMappingString.Split($StringDelimiter)
		foreach ($pair in $pairs)
			{
				$keyValue = $pair.Split($PairDelimiter)
				if ($keyValue.Length -eq 2) {
					$caseSensitiveCharacterMapping.Add($keyValue[0], $keyValue[1])
				}
			}

	# Function to replace characters according to the provided mapping
	function ReplaceCharacters($inputString)
		{
			foreach ($key in $caseSensitiveCharacterMapping.Keys)
				{
				$KeyEscaped = [regex]::escape($Key)	## If the Key contains the special characters, we'd need to escape it
				If ($WordMatchOnly)
					{
					$inputString = $inputString -creplace "\b$KeyEscaped\b", $caseSensitiveCharacterMapping[$key]
					}
				else
					{
					$inputString = $inputString -creplace $KeyEscaped, $caseSensitiveCharacterMapping[$key]
					}
				}
			return $inputString
		}

	# Read file & Convert
	Write-Host "Converting. Please wait..." -NoNewLine
	If ((Get-ChildItem  -Path $FilePath).Extension -eq '.csv')
		{
			$data = Import-Csv -Path $FilePath -Encoding $InputEncoding
			# Process data
			foreach ($entry in $data) {
				foreach ($column in $entry.PSObject.Properties) {
					if ($ColumnsToSkip -notcontains $column.Name) {
						$entry.$($column.Name) = ReplaceCharacters($entry.$($column.Name))
					}
				}
			}
			$data | Export-Csv -Path $OutputFilePath -NoTypeInformation  -Encoding $OutputEncoding
		}
	else
		{
			$data = Get-Content -Path $FilePath -Encoding $InputEncoding
			$data = $data | % {  ReplaceCharacters($_)  }
			$data | out-file -FilePath $OutputFilePath -Encoding $OutputEncoding
		}
	}

Write-Host "`rThe output file saved to $OutputFilePath."
Write-Host "Encoding: $InputEncoding is used to read the file and Encoding: $OutputEncoding is used to write the file."
Write-Host  -fore yellow  "If there are some incorrect characters in the output, try different encoding in -InputEncoding and -OutputEncoding parameter from one of the below values:`nascii,string,unicode,bigendianunicode,utf8,utf7,utf32,default,oem"
	<#
	.SYNOPSIS
	Script to replace multiple characters or strings in the csv or txt file

	.DESCRIPTION
	Script to replace multiple characters or strings in the csv or txt file by using characters or string mapping pairs as an input
	Use cases are converting/deleting multiple non-ASCII and English Characters and strings
	With the csv source file, the script has an option to exclude specific columns from the characters being converted.

	.PARAMETER FilePath
	The input file path, usually txt or csv file.

	.PARAMETER OutputFilePath
	The output file path.

	.PARAMETER ColumnsToSkip
	Column name on which the character conversion will be skipped

	.PARAMETER InputEncoding
	Encoding used to read the source file

	.PARAMETER OutputEncoding
	Encoding used to write the destination file

	.PARAMETER WordMatchOnly
	If used, the change will happen if the characters match only the entire word. Not applicable to special characters.

	.PARAMETER StringDelimiter
	(optional) The delimiter used to split each pair out of the string. Eg., in the below line, the pipe is used to delimit the string
	'Ä:A\|à:a\|â:a\|1st:First\|2nd:Second'
	Default value is comma (,). If the any of the mapping values contains comma, then you should use the another delimiter which is not included in the mapping string.

	.PARAMETER PairDelimiter
	(optional) The delimiter used to split each mapping pair. Eg., in the below line, fullcolumn is the delimiter
	'Ä:A\|à:a\|â:a\|1st:First\|2nd:Second'
	Default value is fullcolumn (:). If any of the mapping values contains fullcolumn, then you should use the another delimiter which is not included in the mapping string.

	.PARAMETER CharacterMapping
	The character mapping on which characters replacement or string replacement will happen based on these mapping pairs. Note that it's case-sensitive replacement.
	The entire string should be single quoted. Each Pair of strings or characters is comma-delimited (by default) and each mapping is fullcolumn-delimited (by default).
	Eg: 'Ä:A,à:a,â:a'
	If there is any single quote in the mapping pair, then you must write this single quote as two single quotes.
	Eg: 'Ä:A,à:a,â:a,'':SingleQuote,":DoubleQuotes'

	.EXAMPLE
	.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.csv -OutputFilePath Output.csv -ColumnsToSkip "secret" -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second'
	Replace characters in all csv columns except "secret" column according to the mapping pair defined in -CharacterMapping

	.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.txt -OutputFilePath Output.txt -WordMatchOnly -InputEncoding ASCII -OutputEncoding UTF8 -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second'
	Replace exact words in the text file according to the given mapping pair using ASCII encoding to read the text file and UTF8 to write the output file.

	.\Replace_Character_Pairs_v1.1.ps1 -FilePath Input.txt -OutputFilePath Output.txt -StringDelimiter '\|' -PairDelimiter ';' -CharacterMapping 'Ä:A,à:a,â:a,1st:first,2nd:second'
	Replace characters in the text file according to the given mapping pair, using pipeline (\|) to split the string and semicolon (;) to split the character pairs.

	Author : Phyoe Wai Paing
	Version : 1.0 : 22.Nov.2023 : Initial Release
	: 1.1 : 30.Mar.2024 : Both txt and csv files are supported as an input file.
	Support escape characters in the CharacterMapping parameter.
	Mapping string changed from double-quoted to single-quoted.
	Added delimiter for mapping pair and string pair

	.LINK
	https://www.scriptinghouse.com/
	#>

	param (
	[Parameter(mandatory=$true)]
	[string]$FilePath,
	[Parameter(mandatory=$true)]
	[string]$OutputFilePath,
	[string[]]$ColumnsToSkip = @(),
	[ValidateSet("ascii","string","unicode","bigendianunicode","utf8","utf7","utf32","default","oem")]
	[string]$InputEncoding='utf8',
	[ValidateSet("ascii","string","unicode","bigendianunicode","utf8","utf7","utf32","default","oem")]
	[string]$OutputEncoding='utf8',
	[switch]$WordMatchOnly,
	[string]$CharacterMappingString,
	[string]$StringDelimiter=',',
	[string]$PairDelimiter = ':'
	)

	# Check if file exists
	if (-not (Test-Path -Path $FilePath))
	{
	Write-Host "File not found: $FilePath"
	exit
	}

	# Create a Dictionary of string for comparsion in case-insensitive manner
	$caseSensitiveCharacterMapping = New-Object 'System.Collections.Generic.Dictionary[string,string]'([System.StringComparer]::Ordinal)

	if ($CharacterMappingString -ne "")
	{
	$pairs = $CharacterMappingString.Split($StringDelimiter)
	foreach ($pair in $pairs)
	{
	$keyValue = $pair.Split($PairDelimiter)
	if ($keyValue.Length -eq 2) {
	$caseSensitiveCharacterMapping.Add($keyValue[0], $keyValue[1])
	}
	}

	# Function to replace characters according to the provided mapping
	function ReplaceCharacters($inputString)
	{
	foreach ($key in $caseSensitiveCharacterMapping.Keys)
	{
	$KeyEscaped = [regex]::escape($Key) ## If the Key contains the special characters, we'd need to escape it
	If ($WordMatchOnly)
	{
	$inputString = $inputString -creplace "\b$KeyEscaped\b", $caseSensitiveCharacterMapping[$key]
	}
	else
	{
	$inputString = $inputString -creplace $KeyEscaped, $caseSensitiveCharacterMapping[$key]
	}
	}
	return $inputString
	}

	# Read file & Convert
	Write-Host "Converting. Please wait..." -NoNewLine
	If ((Get-ChildItem -Path $FilePath).Extension -eq '.csv')
	{
	$data = Import-Csv -Path $FilePath -Encoding $InputEncoding
	# Process data
	foreach ($entry in $data) {
	foreach ($column in $entry.PSObject.Properties) {
	if ($ColumnsToSkip -notcontains $column.Name) {
	$entry.$($column.Name) = ReplaceCharacters($entry.$($column.Name))
	}
	}
	}
	$data \| Export-Csv -Path $OutputFilePath -NoTypeInformation -Encoding $OutputEncoding
	}
	else
	{
	$data = Get-Content -Path $FilePath -Encoding $InputEncoding
	$data = $data \| % { ReplaceCharacters($_) }
	$data \| out-file -FilePath $OutputFilePath -Encoding $OutputEncoding
	}
	}

	Write-Host "`rThe output file saved to $OutputFilePath."
	Write-Host "Encoding: $InputEncoding is used to read the file and Encoding: $OutputEncoding is used to write the file."
	Write-Host -fore yellow "If there are some incorrect characters in the output, try different encoding in -InputEncoding and -OutputEncoding parameter from one of the below values:`nascii,string,unicode,bigendianunicode,utf8,utf7,utf32,default,oem"