jpoehls/encoding-helpers.ps1

## encoding-helpers.ps1
<#
.SYNOPSIS
Converts files to the given encoding.
Matches the include pattern recursively under the given path.

.EXAMPLE
Convert-FileEncoding -Include *.js -Path scripts -Encoding UTF8
#>
function Convert-FileEncoding([string]$Include, [string]$Path, [string]$Encoding='UTF8') {
  $count = 0
  Get-ChildItem -Include $Pattern -Recurse -Path $Path `
  | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} `
  | where {$_.Encoding -ne $Encoding} `
  | % { (Get-Content $_.FullName) `
        | Out-File $_.FullName -Encoding $Encoding; $count++; }

  Write-Host "$count $Pattern file(s) converted to $Encoding in $Path."
}

# http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html
<#
.SYNOPSIS
Gets file encoding.

.DESCRIPTION
The Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM).
Based on port of C# code from http://www.west-wind.com/Weblog/posts/197245.aspx

.EXAMPLE
Get-ChildItem  *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'}
This command gets ps1 files in current directory where encoding is not ASCII

.EXAMPLE
Get-ChildItem  *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | foreach {(get-content $_.FullName) | set-content $_.FullName -Encoding ASCII}
Same as previous example but fixes encoding using set-content


# Modified by F.RICHARD August 2010
# add comment + more BOM
# http://unicode.org/faq/utf_bom.html
# http://en.wikipedia.org/wiki/Byte_order_mark
#
# Do this next line before or add function in Profile.ps1
# Import-Module .\Get-FileEncoding.ps1
#>
function Get-FileEncoding
{
  [CmdletBinding()]
  Param (
    [Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)]
    [string]$Path
  )

  [byte[]]$byte = get-content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path
  #Write-Host Bytes: $byte[0] $byte[1] $byte[2] $byte[3]

  # EF BB BF (UTF8)
  if ( $byte[0] -eq 0xef -and $byte[1] -eq 0xbb -and $byte[2] -eq 0xbf )
  { Write-Output 'UTF8' }

  # FE FF  (UTF-16 Big-Endian)
  elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff)
  { Write-Output 'Unicode UTF-16 Big-Endian' }

  # FF FE  (UTF-16 Little-Endian)
  elseif ($byte[0] -eq 0xff -and $byte[1] -eq 0xfe)
  { Write-Output 'Unicode UTF-16 Little-Endian' }

  # 00 00 FE FF (UTF32 Big-Endian)
  elseif ($byte[0] -eq 0 -and $byte[1] -eq 0 -and $byte[2] -eq 0xfe -and $byte[3] -eq 0xff)
  { Write-Output 'UTF32 Big-Endian' }

  # FE FF 00 00 (UTF32 Little-Endian)
  elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff -and $byte[2] -eq 0 -and $byte[3] -eq 0)
  { Write-Output 'UTF32 Little-Endian' }

  # 2B 2F 76 (38 | 38 | 2B | 2F)
  elseif ($byte[0] -eq 0x2b -and $byte[1] -eq 0x2f -and $byte[2] -eq 0x76 -and ($byte[3] -eq 0x38 -or $byte[3] -eq 0x39 -or $byte[3] -eq 0x2b -or $byte[3] -eq 0x2f) )
  { Write-Output 'UTF7'}

  # F7 64 4C (UTF-1)
  elseif ( $byte[0] -eq 0xf7 -and $byte[1] -eq 0x64 -and $byte[2] -eq 0x4c )
  { Write-Output 'UTF-1' }

  # DD 73 66 73 (UTF-EBCDIC)
  elseif ($byte[0] -eq 0xdd -and $byte[1] -eq 0x73 -and $byte[2] -eq 0x66 -and $byte[3] -eq 0x73)
  { Write-Output 'UTF-EBCDIC' }

  # 0E FE FF (SCSU)
  elseif ( $byte[0] -eq 0x0e -and $byte[1] -eq 0xfe -and $byte[2] -eq 0xff )
  { Write-Output 'SCSU' }

  # FB EE 28  (BOCU-1)
  elseif ( $byte[0] -eq 0xfb -and $byte[1] -eq 0xee -and $byte[2] -eq 0x28 )
  { Write-Output 'BOCU-1' }

  # 84 31 95 33 (GB-18030)
  elseif ($byte[0] -eq 0x84 -and $byte[1] -eq 0x31 -and $byte[2] -eq 0x95 -and $byte[3] -eq 0x33)
  { Write-Output 'GB-18030' }

  else
  { Write-Output 'ASCII' }
}
	<#
	.SYNOPSIS
	Converts files to the given encoding.
	Matches the include pattern recursively under the given path.

	.EXAMPLE
	Convert-FileEncoding -Include *.js -Path scripts -Encoding UTF8
	#>
	function Convert-FileEncoding([string]$Include, [string]$Path, [string]$Encoding='UTF8') {
	$count = 0
	Get-ChildItem -Include $Pattern -Recurse -Path $Path `
	\| select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} `
	\| where {$_.Encoding -ne $Encoding} `
	\| % { (Get-Content $_.FullName) `
	\| Out-File $_.FullName -Encoding $Encoding; $count++; }

	Write-Host "$count $Pattern file(s) converted to $Encoding in $Path."
	}

	# http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html
	<#
	.SYNOPSIS
	Gets file encoding.

	.DESCRIPTION
	The Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM).
	Based on port of C# code from http://www.west-wind.com/Weblog/posts/197245.aspx

	.EXAMPLE
	Get-ChildItem *.ps1 \| select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} \| where {$_.Encoding -ne 'ASCII'}
	This command gets ps1 files in current directory where encoding is not ASCII

	.EXAMPLE
	Get-ChildItem *.ps1 \| select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} \| where {$_.Encoding -ne 'ASCII'} \| foreach {(get-content $_.FullName) \| set-content $_.FullName -Encoding ASCII}
	Same as previous example but fixes encoding using set-content


	# Modified by F.RICHARD August 2010
	# add comment + more BOM
	# http://unicode.org/faq/utf_bom.html
	# http://en.wikipedia.org/wiki/Byte_order_mark
	#
	# Do this next line before or add function in Profile.ps1
	# Import-Module .\Get-FileEncoding.ps1
	#>
	function Get-FileEncoding
	{
	[CmdletBinding()]
	Param (
	[Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)]
	[string]$Path
	)

	[byte[]]$byte = get-content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path
	#Write-Host Bytes: $byte[0] $byte[1] $byte[2] $byte[3]

	# EF BB BF (UTF8)
	if ( $byte[0] -eq 0xef -and $byte[1] -eq 0xbb -and $byte[2] -eq 0xbf )
	{ Write-Output 'UTF8' }

	# FE FF (UTF-16 Big-Endian)
	elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff)
	{ Write-Output 'Unicode UTF-16 Big-Endian' }

	# FF FE (UTF-16 Little-Endian)
	elseif ($byte[0] -eq 0xff -and $byte[1] -eq 0xfe)
	{ Write-Output 'Unicode UTF-16 Little-Endian' }

	# 00 00 FE FF (UTF32 Big-Endian)
	elseif ($byte[0] -eq 0 -and $byte[1] -eq 0 -and $byte[2] -eq 0xfe -and $byte[3] -eq 0xff)
	{ Write-Output 'UTF32 Big-Endian' }

	# FE FF 00 00 (UTF32 Little-Endian)
	elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff -and $byte[2] -eq 0 -and $byte[3] -eq 0)
	{ Write-Output 'UTF32 Little-Endian' }

	# 2B 2F 76 (38 \| 38 \| 2B \| 2F)
	elseif ($byte[0] -eq 0x2b -and $byte[1] -eq 0x2f -and $byte[2] -eq 0x76 -and ($byte[3] -eq 0x38 -or $byte[3] -eq 0x39 -or $byte[3] -eq 0x2b -or $byte[3] -eq 0x2f) )
	{ Write-Output 'UTF7'}

	# F7 64 4C (UTF-1)
	elseif ( $byte[0] -eq 0xf7 -and $byte[1] -eq 0x64 -and $byte[2] -eq 0x4c )
	{ Write-Output 'UTF-1' }

	# DD 73 66 73 (UTF-EBCDIC)
	elseif ($byte[0] -eq 0xdd -and $byte[1] -eq 0x73 -and $byte[2] -eq 0x66 -and $byte[3] -eq 0x73)
	{ Write-Output 'UTF-EBCDIC' }

	# 0E FE FF (SCSU)
	elseif ( $byte[0] -eq 0x0e -and $byte[1] -eq 0xfe -and $byte[2] -eq 0xff )
	{ Write-Output 'SCSU' }

	# FB EE 28 (BOCU-1)
	elseif ( $byte[0] -eq 0xfb -and $byte[1] -eq 0xee -and $byte[2] -eq 0x28 )
	{ Write-Output 'BOCU-1' }

	# 84 31 95 33 (GB-18030)
	elseif ($byte[0] -eq 0x84 -and $byte[1] -eq 0x31 -and $byte[2] -eq 0x95 -and $byte[3] -eq 0x33)
	{ Write-Output 'GB-18030' }

	else
	{ Write-Output 'ASCII' }
	}