Created
November 16, 2012 21:57
-
-
Save noblethrasher/4091269 to your computer and use it in GitHub Desktop.
Finds duplicate files on a Windows system and generates a simple report.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Text; | |
using System.IO; | |
using System.Security.Cryptography; | |
using System.Diagnostics; | |
namespace DuplicateFileFinder | |
{ | |
abstract class DataUnit | |
{ | |
protected readonly long length; | |
public DataUnit(long length) | |
{ | |
this.length = length; | |
} | |
public static DataUnit GetAppropriateUnit(long length) | |
{ | |
if (length >= 1024 * 1024 * 1024) | |
return new Gigabyte (length); | |
if (length >= 1024 * 1024) | |
return new Megabyte (length); | |
if (length >= 1024) | |
return new Kilobyte (length); | |
return new Byte (length); | |
} | |
sealed class Byte : DataUnit | |
{ | |
public Byte(long length) : base (length) { } | |
public override string ToString() | |
{ | |
return length + ((length == 1) ? " byte" : " bytes"); | |
} | |
} | |
sealed class Kilobyte : DataUnit | |
{ | |
public Kilobyte(long length) : base (length) { } | |
public override string ToString() | |
{ | |
return length / (1024) + " KB"; | |
} | |
} | |
sealed class Megabyte : DataUnit | |
{ | |
public Megabyte(long length) : base (length) { } | |
public override string ToString() | |
{ | |
return length / (1024L * 1024) + " MB"; | |
} | |
} | |
sealed class Gigabyte : DataUnit | |
{ | |
public Gigabyte(long length) : base (length) { } | |
public override string ToString() | |
{ | |
return length / (1024L * 1024 * 1024) + " GB"; | |
} | |
} | |
} | |
abstract class TimeUnit | |
{ | |
protected readonly int length; | |
protected const int MINUTE = 60; | |
protected const int HOUR = 60 * MINUTE; | |
protected const int DAY = 24 * HOUR; | |
protected const int WEEK = 7 * DAY; | |
protected const int MONTH = 30 * DAY; | |
public TimeUnit(int length) | |
{ | |
this.length = length; | |
} | |
public static TimeUnit GetApproriateUnit(int length) | |
{ | |
if (length >= MONTH) | |
return new Month (length); | |
if (length >= WEEK) | |
return new Week (length); | |
if (length >= DAY) | |
return new Day (length); | |
if (length >= HOUR) | |
return new Hour (length); | |
if (length >= MINUTE) | |
return new Minute (length); | |
return new Second (length); | |
} | |
public abstract override string ToString(); | |
protected string Report(int denomination, string name) | |
{ | |
var k = length / denomination; | |
return k + " " + name + (k == 1 ? "" : "s"); | |
} | |
sealed class Month : TimeUnit | |
{ | |
public Month(int length) : base (length) { } | |
public override string ToString() | |
{ | |
return Report (MONTH, "month"); | |
} | |
} | |
sealed class Week : TimeUnit | |
{ | |
public Week(int length) : base (length) { } | |
public override string ToString() | |
{ | |
return Report (WEEK, "week"); | |
} | |
} | |
sealed class Day : TimeUnit | |
{ | |
public Day(int length) : base (length) { } | |
public override string ToString() | |
{ | |
return Report (DAY, "day"); | |
} | |
} | |
sealed class Hour : TimeUnit | |
{ | |
public Hour(int length) : base (length) { } | |
public override string ToString() | |
{ | |
return Report (HOUR, "hour"); | |
} | |
} | |
sealed class Minute : TimeUnit | |
{ | |
public Minute(int length) : base (length) { } | |
public override string ToString() | |
{ | |
return Report (MINUTE, "minute"); | |
} | |
} | |
sealed class Second : TimeUnit | |
{ | |
public Second(int length) : base (length) { } | |
public override string ToString() | |
{ | |
return Report (1, "second"); | |
} | |
} | |
} | |
class Program | |
{ | |
static readonly MD5 md5 = MD5.Create (); | |
static Dictionary<string, FileGroup> files = new Dictionary<string, FileGroup> (); | |
static readonly string border; | |
class FileGroup : IEquatable<FileGroup> | |
{ | |
public readonly string Hash; | |
public readonly List<FileInfo> files = new List<FileInfo> (); | |
public readonly static List<string> problem_files = new List<string> (); | |
public FileGroup(FileInfo info) | |
{ | |
try | |
{ | |
using (var fs = new FileStream (info.FullName, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite)) | |
Hash = Convert.ToBase64String (md5.ComputeHash (fs)); | |
files.Add (info); | |
} | |
catch (Exception ex) | |
{ | |
problem_files.Add (info.FullName + " " + ex.Message); | |
} | |
} | |
public FileGroup(string hash, FileInfo info) | |
{ | |
this.Hash = hash; | |
files.Add (info); | |
} | |
private FileGroup(string hash) | |
{ | |
this.Hash = hash; | |
} | |
public void Add(FileInfo info) | |
{ | |
files.Add (info); | |
} | |
public long ExcessSize | |
{ | |
get | |
{ | |
return (files.Count - 1) * files[0].Length; | |
} | |
} | |
public override int GetHashCode() | |
{ | |
return md5.GetHashCode (); | |
} | |
public override bool Equals(object obj) | |
{ | |
var other = obj as FileGroup; | |
if (other != null && other.Hash == this.Hash) | |
return true; | |
else | |
return false; | |
} | |
public bool Equals(FileGroup other) | |
{ | |
return other.Hash == this.Hash; | |
} | |
public static implicit operator FileGroup(string s) | |
{ | |
return new FileGroup (s); | |
} | |
} | |
static Program() | |
{ | |
var chars = new char[80]; | |
for (var i = 0; i < 80; i++) | |
chars[i] = '-'; | |
border = new string (chars); | |
} | |
static void Main(string[] args) | |
{ | |
string disposition = "yes"; | |
var stopwatch = new Stopwatch (); | |
do | |
{ | |
stopwatch.Reset (); | |
Console.Clear (); | |
FileGroup.problem_files.Clear (); | |
Console.WriteLine ("Directory:"); | |
var root = Console.ReadLine (); | |
if (Directory.Exists (root)) | |
{ | |
stopwatch.Start (); | |
GetFiles (root); | |
stopwatch.Stop (); | |
var time_taken = TimeUnit.GetApproriateUnit ((int)(stopwatch.ElapsedMilliseconds / 1000)).ToString (); | |
Console.WriteLine (); | |
Console.WriteLine ("Scanned " + files.Count + " files in " + time_taken); | |
var xs = new List<FileGroup> (); | |
foreach (var grp in files.Values) | |
if (grp.files.Count > 1) | |
xs.Add (grp); | |
var dupes = xs.ToArray (); | |
Console.WriteLine (dupes.Length + " files with duplicates"); | |
if (dupes.Length > 0) | |
{ | |
var desktop_path = Environment.GetFolderPath (Environment.SpecialFolder.Desktop); | |
using (var sw = new StreamWriter (Path.Combine (desktop_path, "Duplicate File Report.txt"), true)) | |
{ | |
sw.WriteLine ("Duplicate files in " + root + " (" + DateTime.Now.ToString () + ")".ToUpper ()); | |
sw.WriteLine (border); | |
sw.WriteLine (); | |
DataUnit excess = null; | |
foreach (var dupe in dupes) | |
{ | |
excess = DataUnit.GetAppropriateUnit (dupe.ExcessSize); | |
sw.WriteLine (dupe.files[0].FullName); | |
for (var i = 1; i < dupe.files.Count; i++) | |
sw.WriteLine ("\t" + dupe.files[i].FullName); | |
sw.WriteLine (); | |
} | |
sw.WriteLine (); | |
sw.WriteLine ("Excess: " + excess); | |
sw.WriteLine (); | |
} | |
} | |
if (FileGroup.problem_files.Count > 0) | |
Console.WriteLine ("There was a problem reading or opening " + FileGroup.problem_files.Count + " file" + ((FileGroup.problem_files.Count > 1) ? "s" : "") + "or " + (FileGroup.problem_files.Count > 0 ? "directory" : "directories")); | |
} | |
else | |
{ | |
Console.WriteLine ("Cannot find that directory (it might be misspelled)."); | |
Console.WriteLine (); | |
} | |
Console.WriteLine ("Search other directories (y/n)?"); | |
} while ((disposition = Console.ReadLine ()).StartsWith ("y", StringComparison.OrdinalIgnoreCase)); | |
} | |
static void GetFiles(string root) | |
{ | |
Console.WriteLine ("Pattern:"); | |
var patterns = Console.ReadLine (); | |
if (patterns.Length < 1) | |
patterns = "*"; | |
var stack = new Stack<DirectoryInfo> (new[] { new DirectoryInfo (root) }); | |
var default_color = Console.ForegroundColor; | |
while (stack.Count > 0) | |
{ | |
var dir = stack.Pop (); | |
try | |
{ | |
foreach (var pattern in patterns.Split (new[] { ';' }, StringSplitOptions.RemoveEmptyEntries)) | |
{ | |
foreach (var file in dir.GetFiles (pattern)) | |
{ | |
try | |
{ | |
using (var sr = file.Open (FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) | |
{ | |
var hash = Convert.ToBase64String (md5.ComputeHash (sr)); | |
if (files.ContainsKey (hash)) | |
{ | |
files[hash].files.Add (file); | |
Console.ForegroundColor = ConsoleColor.Red; | |
} | |
else | |
{ | |
files.Add (hash, new FileGroup (hash, file)); | |
} | |
Console.WriteLine (file.FullName + " " + hash); | |
Console.ForegroundColor = default_color; | |
} | |
} | |
catch (Exception ex1) | |
{ | |
FileGroup.problem_files.Add (file.FullName + " " + ex1.Message); | |
} | |
} | |
} | |
foreach (var sub in Utils.Reverse(dir.GetDirectories ())) | |
{ | |
stack.Push (sub); | |
} | |
} | |
catch (Exception ex) | |
{ | |
} | |
} | |
} | |
} | |
public static class Utils | |
{ | |
public static IEnumerable<T> Reverse<T>(IEnumerable<T> xs) | |
{ | |
if (xs is T[] && (!typeof (T).IsValueType)) | |
{ | |
var array = xs as T[]; | |
var copy = new T[array.Length]; | |
for (var i = array.Length - 1; i >= 0; i--) | |
{ | |
copy[array.Length - 1 - i] = array[i]; | |
} | |
return copy; | |
} | |
if (xs is List<T>) | |
{ | |
var list = xs as List<T>; | |
var copy = new List<T> (list); | |
copy.Reverse (); | |
return copy; | |
} | |
return Reverse (xs.GetEnumerator ()); | |
} | |
private static IEnumerable<T> Reverse<T>(IEnumerator<T> enumerator) | |
{ | |
var stack = new Stack<T> (); | |
while (enumerator.MoveNext ()) | |
stack.Push (enumerator.Current); | |
foreach (var t in stack) | |
yield return t; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Just a simple console app for finding duplicate files (based on the MD5 signature).