public
Last active

  • Download Gist
gistfile1.cs
C#
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
static class LinqExtension {
public static IEnumerable<T> SampleEvery<T>(this IEnumerable<T> items, int sample) {
int i = 0;
foreach (var item in items) {
if ((i % sample) == 0) {
yield return item;
}
i++;
}
}
}
 
// number of samples to take
const int SampleCount = 4;
 
// size of each random file sample
const int SampleSize = 4 * 1024;
// files smaller than this get no random sampling
const int SamplingThreshold = 16 * 1024;
 
public static Guid GetFileSignature(string filename) {
 
byte[] buffer;
long filesize;
 
using (var reader = File.Open(filename, FileMode.Open, FileAccess.Read)) {
 
filesize = reader.Length;
 
if (filesize < SamplingThreshold) {
buffer = new byte[filesize];
Read(reader, buffer, 0, (int)filesize);
} else {
 
Random random = new Random((int)(filesize % int.MaxValue));
 
int maxSize = filesize < (long)Int32.MaxValue ? (int)filesize : Int32.MaxValue;
 
// space out random numbers
var startPositions = Enumerable
.Range(0, SampleCount * 4)
.Select(_ => random.Next(maxSize))
.OrderBy(i => i)
.SampleEvery(4)
.ToArray();
 
buffer = new byte[SampleCount * SampleSize];
int bufferPosition = 0;
 
long currentPosition = 0;
 
foreach (var start in startPositions) {
currentPosition = reader.Seek(start - currentPosition, SeekOrigin.Current);
var bytesRead = Read(reader, buffer, bufferPosition, SampleSize);
currentPosition += bytesRead;
bufferPosition += bytesRead;
}
}
}
var md5Provider = new MD5CryptoServiceProvider();
md5Provider.TransformBlock(buffer, 0, buffer.Length, buffer, 0);
// include the filesize in the hash
var fileSizeArray = BitConverter.GetBytes(filesize);
md5Provider.TransformFinalBlock(fileSizeArray, 0, fileSizeArray.Length);
 
return new Guid(md5Provider.Hash);
}
 
private static int Read(FileStream reader, byte[] buffer, int offset, int count) {
int totalBytesRead = 0;
int bytesRead = 0;
do {
bytesRead = reader.Read(buffer, offset + totalBytesRead, count - totalBytesRead);
totalBytesRead += bytesRead;
} while (totalBytesRead < count && bytesRead > 0);
return totalBytesRead;
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.