Skip to content

Instantly share code, notes, and snippets.

@klinkby
Created May 10, 2022 10:15
Show Gist options
  • Save klinkby/26de88bf36e84bb832edeca9aa361509 to your computer and use it in GitHub Desktop.
Save klinkby/26de88bf36e84bb832edeca9aa361509 to your computer and use it in GitHub Desktop.
static async Task<Encoding> GetEncodingFromStream(Stream s, int bufferSize, CancellationToken ct)
{
if (!s.CanSeek || !s.CanRead)
{
// can't touch this
return Encoding.Default;
}
byte[] buffer = new byte[bufferSize];
int bytesRead = await s.ReadAsync(buffer, 0, buffer.Length, ct);
if (0 == bytesRead)
{
// file is empty
return Encoding.Default;
}
s.Seek(0, SeekOrigin.Begin);
return GetEncodingFromBuffer(buffer, bytesRead);
}
static Encoding GetEncodingFromBuffer(byte[] buffer, int count)
{
var encodingsWithBoms = Encoding.GetEncodings().Select(x => x.GetEncoding()).Where(x => 0 != x.Preamble.Length).OrderByDescending(x => x.Preamble.Length);
// Strategy 1: Does it start with one of the byte order marks (bom)?
Encoding? theEncoding = encodingsWithBoms.FirstOrDefault(x => 0 == IndexOf(buffer, x.Preamble.ToArray(), x.Preamble.Length));
if (theEncoding is not null)
{
// detected from bom
return theEncoding;
}
//Strategy 2: Detect from national chars
var eightBitEncodings = new[] { Encoding.UTF8, Encoding.Latin1 }; // DOS code pages not supported in netcore
const string NationalChars = "æøåÆØÅ";
var encodingSamples = eightBitEncodings.SelectMany(x => NationalChars.Select(y => (Sample: x.GetBytes(y.ToString()), Encoding: x)));
// buffer read
foreach (var es in encodingSamples)
{
// look for encoding sample
if (-1 != IndexOf(buffer, es.Sample, count))
{
// detected from national chars
return es.Encoding;
}
}
// Stratgy 3: fall back
return Encoding.Default;
}
static int IndexOf(byte[] array, byte[] pattern, int count)
{
int fidx = 0;
int result = Array.FindIndex(array, 0, count, (byte b) => {
fidx = (b == pattern[fidx]) ? fidx + 1 : 0;
return (fidx == pattern.Length);
});
return (result < 0) ? -1 : result - fidx + 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment