Skip to content

Instantly share code, notes, and snippets.

@luiseduardohd
Forked from jlewin/ListDuplicateFiles.cs
Created June 28, 2014 17:26
Show Gist options
  • Save luiseduardohd/e748bb64e15e91f7e4ea to your computer and use it in GitHub Desktop.
Save luiseduardohd/e748bb64e15e91f7e4ea to your computer and use it in GitHub Desktop.
/// <summary>
/// Processes a given directory, grouping and listing files with duplicate content
/// </summary>
/// <param name="directory">The path to process</param>
private void ListDuplicateFiles(string directory)
{
// Calculate and store the hash and path for each file in the directory
var files = Directory.GetFiles(directory).Select(f => new { Path = f, Hash = FileHash.CalculateFromFile(f) });
// Group and iterate when duplicates exist
foreach (var group in files.GroupBy(f => f.Hash).Where(g => g.Count() > 1))
{
Console.WriteLine("--- Dupe ({0})-------------------------------", group.Key);
foreach (var file in group)
{
// List each file
Console.WriteLine("\t {0}", Path.GetFileName(file.Path));
}
}
}
public static class FileHash
{
/// <summary>
/// Generate an MD5 hash of the given file
/// </summary>
/// <param name="filename">The content file</param>
/// <returns>The generated hash</returns>
public static string CalculateFromFile(string filename)
{
// read file bytes
byte[] bytes = File.ReadAllBytes(filename);
// md5 calculation
MD5CryptoServiceProvider md5 = new MD5CryptoServiceProvider();
byte[] output = md5.ComputeHash(bytes);
string hash = Convert.ToBase64String(output);
// returning
return hash;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment