Skip to content

Instantly share code, notes, and snippets.

@tkMageztik
Last active May 3, 2017 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tkMageztik/442de2f94662cd199b4c2700378b1e03 to your computer and use it in GitHub Desktop.
Save tkMageztik/442de2f94662cd199b4c2700378b1e03 to your computer and use it in GitHub Desktop.
Determinar el encoding de un documento, lectura de bytes.
/// <summary>
/// Determina el encoding de un archivo de texto analizando su bytes order mark (BOM).
/// Usa el encoding por defecto del SO cuando la detección del endianes archivo de texto falla.
/// </summary>
/// <param name="filename">La ruta del archivo a analizar.</param>
/// <returns>El encoding detectado.</returns>
public static Encoding GetEncoding(string filename)
{
// Leyendo el BOM
var bom = new byte[4];
using (var file = new FileStream(filename, FileMode.Open, FileAccess.Read))
{
file.Read(bom, 0, 4);
}
// Analizando el BOM
if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7;
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8;
if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE
if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE
if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32;
return Encoding.Default;
}
//28591 iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO) - FUNCIONA
//1252 windows-1252 ANSI Latin 1; Western European (Windows) - FUNCIONA
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment