Skip to content

Instantly share code, notes, and snippets.

@SamSaffron
Created April 25, 2009 11:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SamSaffron/101591 to your computer and use it in GitHub Desktop.
Save SamSaffron/101591 to your computer and use it in GitHub Desktop.
static class LinqExtension {
public static IEnumerable<T> SampleEvery<T>(this IEnumerable<T> items, int sample) {
int i = 0;
foreach (var item in items) {
if ((i % sample) == 0) {
yield return item;
}
i++;
}
}
}
// number of samples to take
const int SampleCount = 4;
// size of each random file sample
const int SampleSize = 4 * 1024;
// files smaller than this get no random sampling
const int SamplingThreshold = 16 * 1024;
public static Guid GetFileSignature(string filename) {
byte[] buffer;
long filesize;
using (var reader = File.Open(filename, FileMode.Open, FileAccess.Read)) {
filesize = reader.Length;
if (filesize < SamplingThreshold) {
buffer = new byte[filesize];
Read(reader, buffer, 0, (int)filesize);
} else {
Random random = new Random((int)(filesize % int.MaxValue));
int maxSize = filesize < (long)Int32.MaxValue ? (int)filesize : Int32.MaxValue;
// space out random numbers
var startPositions = Enumerable
.Range(0, SampleCount * 4)
.Select(_ => random.Next(maxSize))
.OrderBy(i => i)
.SampleEvery(4)
.ToArray();
buffer = new byte[SampleCount * SampleSize];
int bufferPosition = 0;
long currentPosition = 0;
foreach (var start in startPositions) {
currentPosition = reader.Seek(start - currentPosition, SeekOrigin.Current);
var bytesRead = Read(reader, buffer, bufferPosition, SampleSize);
currentPosition += bytesRead;
bufferPosition += bytesRead;
}
}
}
var md5Provider = new MD5CryptoServiceProvider();
md5Provider.TransformBlock(buffer, 0, buffer.Length, buffer, 0);
// include the filesize in the hash
var fileSizeArray = BitConverter.GetBytes(filesize);
md5Provider.TransformFinalBlock(fileSizeArray, 0, fileSizeArray.Length);
return new Guid(md5Provider.Hash);
}
private static int Read(FileStream reader, byte[] buffer, int offset, int count) {
int totalBytesRead = 0;
int bytesRead = 0;
do {
bytesRead = reader.Read(buffer, offset + totalBytesRead, count - totalBytesRead);
totalBytesRead += bytesRead;
} while (totalBytesRead < count && bytesRead > 0);
return totalBytesRead;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment