Skip to content

Instantly share code, notes, and snippets.

@LarryWeiss
Created May 7, 2019 15:13
Show Gist options
  • Save LarryWeiss/296061f477615dc1160e4e45f339fd8a to your computer and use it in GitHub Desktop.
Save LarryWeiss/296061f477615dc1160e4e45f339fd8a to your computer and use it in GitHub Desktop.
Show-FileEncoding function written in PowerShell
########################################################################################################
#
# .SYNOPSIS
#
# PowerShell function that returns the file encoding name as a string.
#
# _____ _ ______ _ _ ______ _ _
# / ____| | | ____(_) | | ____| | (_)
# | (___ | |__ _____ ______| |__ _| | ___| |__ _ __ ___ ___ __| |_ _ __ __ _
# \___ \| '_ \ / _ \ \ /\ / /____| __| | | |/ _ \ __| | '_ \ / __/ _ \ / _` | | '_ \ / _` |
# ____) | | | | (_) \ V V / | | | | | __/ |____| | | | (_| (_) | (_| | | | | | (_| |
# |_____/|_| |_|\___/ \_/\_/ |_| |_|_|\___|______|_| |_|\___\___/ \__,_|_|_| |_|\__, |
# __/ |
# |___/
# .DESCRIPTION
#
# The Show-FileEncoding function determines encoding by looking at Byte Order Mark (BOM), or
# if no BOM then by reading the file to determine if it is ASCII or UTF-8
#
# .PARAMETER Path
#
# Path to the file to be examined
#
# .EXAMPLE
#
# Show-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Show-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'}
# This command gets ps1 files in current directory where encoding is not ASCII
#
# .EXAMPLE
#
# Show-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Show-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | foreach {(get-content $_.FullName) | set-content $_.FullName -Encoding ASCII}
# Same as previous example but fixes encoding using set-content
#
# .INPUTS
#
# None. This command does not accept pipeline input.
#
# .OUTPUTS
#
# String
#
# .NOTES
#
# Derived by Larry Weiss (lfw@airmail.net) from
# a version by Franck Richard at
# http://franckrichard.blogspot.de/2010/08/powershell-get-encoding-file-type.html
# that was based on a port of C# code by Rick Strahl from
# http://www.west-wind.com/Weblog/posts/197245.aspx
#
# .LINK
#
# References:
# http://unicode.org/faq/utf_bom.html
# http://en.wikipedia.org/wiki/Byte_order_mark
#
########################################################################################################
function Show-FileEncoding {
[CmdletBinding()]
param (
[string]
[Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)]
$Path
)
[byte[]] $bytes = $null
[byte[]] $b = get-content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path
if ( $b.count -eq 0 ) {
'ASCII'
return
}
if ( $b[0] -eq 0xef -and $b[1] -eq 0xbb -and $b[2] -eq 0xbf ) {
'UTF8'
}
elseif ( $b[0] -eq 0xfe -and $b[1] -eq 0xff ) {
'Unicode UTF-16 Big-Endian'
}
elseif ( $b[0] -eq 0xff -and $b[1] -eq 0xfe ) {
'Unicode UTF-16 Little-Endian'
}
elseif ( $b[0] -eq 0 -and $b[1] -eq 0 -and $b[2] -eq 0xfe -and $b[3] -eq 0xff ) {
'UTF32 Big-Endian'
}
elseif ( $b[0] -eq 0xfe -and $b[1] -eq 0xff -and $b[2] -eq 0 -and $b[3] -eq 0 ) {
'UTF32 Little-Endian'
}
elseif ( $b[0] -eq 0x2b -and $b[1] -eq 0x2f -and $b[2] -eq 0x76 -and ($b[3] -eq 0x38 -or $b[3] -eq 0x39 -or $b[3] -eq 0x2b -or $b[3] -eq 0x2f) ) {
'UTF7'
}
elseif ( $b[0] -eq 0xf7 -and $b[1] -eq 0x64 -and $b[2] -eq 0x4c ) {
'UTF-1'
}
elseif ( $b[0] -eq 0xdd -and $b[1] -eq 0x73 -and $b[2] -eq 0x66 -and $b[3] -eq 0x73 ) {
'UTF-EBCDIC'
}
elseif ( $b[0] -eq 0x0e -and $b[1] -eq 0xfe -and $b[2] -eq 0xff ) {
'SCSU'
}
elseif ( $b[0] -eq 0xfb -and $b[1] -eq 0xee -and $b[2] -eq 0x28 ) {
'BOCU-1'
}
elseif ( $b[0] -eq 0x84 -and $b[1] -eq 0x31 -and $b[2] -eq 0x95 -and $b[3] -eq 0x33 ) {
'GB-18030'
}
else {
$bytes = [IO.File]::ReadAllBytes((Resolve-Path $Path))
foreach ( $byte in $bytes ) {
if ( $byte -gt 0x7f) {
"UTF8" + "!" + $byte
return
}
}
'ASCII'
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment