Created
January 16, 2018 21:30
-
-
Save sdwheeler/777b9ec25e914bdeb0774471a149951c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function Get-FileEncoding { | |
## Get-FileEncoding http://poshcode.org/2153 | |
<# | |
.SYNOPSIS | |
Gets the encoding of a file | |
.EXAMPLE | |
Get-FileEncoding.ps1 .\UnicodeScript.ps1 | |
BodyName : unicodeFFFE | |
EncodingName : Unicode (Big-Endian) | |
HeaderName : unicodeFFFE | |
WebName : unicodeFFFE | |
WindowsCodePage : 1200 | |
IsBrowserDisplay : False | |
IsBrowserSave : False | |
IsMailNewsDisplay : False | |
IsMailNewsSave : False | |
IsSingleByte : False | |
EncoderFallback : System.Text.EncoderReplacementFallback | |
DecoderFallback : System.Text.DecoderReplacementFallback | |
IsReadOnly : True | |
CodePage : 1201 | |
#> | |
param( | |
## The path of the file to get the encoding of. | |
$Path | |
) | |
Set-StrictMode -Version Latest | |
## The hashtable used to store our mapping of encoding bytes to their | |
## name. For example, "255-254 = Unicode" | |
$encodings = @{} | |
## Find all of the encodings understood by the .NET Framework. For each, | |
## determine the bytes at the start of the file (the preamble) that the .NET | |
## Framework uses to identify that encoding. | |
$encodingMembers = [System.Text.Encoding] | | |
Get-Member -Static -MemberType Property | |
$encodingMembers | Foreach-Object { | |
$encodingBytes = [System.Text.Encoding]::($_.Name).GetPreamble() -join '-' | |
$encodings[$encodingBytes] = $_.Name | |
} | |
## Find out the lengths of all of the preambles. | |
$encodingLengths = $encodings.Keys | Where-Object { $_ } | | |
Foreach-Object { ($_ -split '-').Count } | |
## Assume the encoding is UTF7 by default | |
$result = 'UTF7' | |
## Go through each of the possible preamble lengths, read that many | |
## bytes from the file, and then see if it matches one of the encodings | |
## we know about. | |
foreach($encodingLength in $encodingLengths | Sort-Object -Descending) | |
{ | |
$bytes = (Get-Content -encoding byte -readcount $encodingLength $path)[0] | |
$encoding = $encodings[$bytes -join '-'] | |
## If we found an encoding that had the same preamble bytes, | |
## save that output and break. | |
if($encoding) | |
{ | |
$result = $encoding | |
break | |
} | |
} | |
## Finally, output the encoding. | |
[System.Text.Encoding]::$result | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment