Skip to content

Instantly share code, notes, and snippets.

@tompazourek
Forked from nramsbottom/Program.cs
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tompazourek/3be6ec63fdc4b2df3e08 to your computer and use it in GitHub Desktop.
Save tompazourek/3be6ec63fdc4b2df3e08 to your computer and use it in GitHub Desktop.
DetectUTFBom
//
// Finds and displays the filenames of all UTF8 files that contain a byte order mark
// in a specified directory tree.
//
// based on information from the following URL
//
// http://stackoverflow.com/questions/4520184/how-to-detect-the-character-encoding-of-a-text-file
//
using System;
using System.IO;
using System.Linq;
namespace DetectUTFBom
{
class Program
{
static int Main(string[] args)
{
if (args.Length == 0)
{
Console.WriteLine("Usage: DetectUTFBOM <directory>");
return 1;
}
var directoryToScan = args[0];
if (!Directory.Exists(directoryToScan))
{
Console.WriteLine("Directory does not exist.");
return 2;
}
var allFiles = Directory.GetFiles(directoryToScan,
"*.*",
SearchOption.AllDirectories);
var filesWithBom = allFiles.Where(FileHasByteOrderMark);
foreach(var filename in filesWithBom)
Console.WriteLine(filename);
Console.WriteLine();
Console.WriteLine("{0} file(s) found,", filesWithBom.Count());
return 0;
}
private static bool FileHasByteOrderMark(string filename)
{
using (var s = File.Open(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
{
// read the first three bytes. that should contain the byte order mark
// which is the following sequence of bytes EF BB BF
var bom = new byte[3];
s.Read(bom, 0, 3);
if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF)
return true; // found bom
s.Read(bom, 0, 1);
}
return false;
} // FileHasByteOrderMark
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment