Created
January 11, 2010 00:14
-
-
Save tiernano/273880 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.IO; | |
using System.Security.Cryptography; | |
using System.Diagnostics; | |
using System.Collections; | |
namespace Dedupe | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
int blockSize = 64 * 1024 * 1024; | |
dedupe d = new dedupe(blockSize); | |
if (args.Length == 1) | |
{ | |
string directory = args[0]; | |
foreach (string s in Directory.GetFiles(directory,"*.*",SearchOption.AllDirectories)) | |
{ | |
Console.WriteLine("Checking {0}", s); | |
d.DupeCheck(s); | |
} | |
Console.WriteLine("Total Blocks {0}. Total Dupes {1}. Est savings {2}", d.blocks(), d.dupes, blockSize * d.dupes); | |
} | |
else | |
{ | |
Console.WriteLine("need to enter a directory you want tested"); | |
} | |
Console.ReadLine(); | |
} | |
} | |
class dedupe | |
{ | |
Hashtable t; | |
int _blockSize = 64 * 1024 * 1024; | |
public int dupes; | |
public int blocks() | |
{ | |
return t.Count; | |
} | |
public dedupe() | |
{ | |
t = new Hashtable(); | |
} | |
public dedupe(int blockSize) | |
{ | |
_blockSize = blockSize; | |
t = new Hashtable(); | |
} | |
public void DupeCheck(string filename) | |
{ | |
FileStream fs = File.OpenRead(filename); | |
long fileLength = fs.Length; | |
long bytesLeft = fs.Length; | |
int lastPos = 0; | |
while (bytesLeft > 0) | |
{ | |
byte[] block = new byte[_blockSize]; | |
fs.Read(block, lastPos, _blockSize); | |
string hash = GetHash(block); | |
if (t.Contains(hash)) | |
{ | |
dupes++; | |
// Console.WriteLine("dupe"); | |
} | |
else | |
{ | |
t.Add(hash, block); | |
// Console.WriteLine("non dupe"); | |
} | |
bytesLeft = bytesLeft - _blockSize; | |
} | |
} | |
private static string GetHash(byte[] data) | |
{ | |
SHA512 sha = new SHA512CryptoServiceProvider(); | |
StringBuilder sb = new StringBuilder(); | |
data = sha.ComputeHash(data); | |
foreach (byte by in data) | |
{ | |
sb.Append(by.ToString("x2")); | |
} | |
return sb.ToString(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment