Skip to content

Instantly share code, notes, and snippets.

@pawitp
Created October 24, 2021 03:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pawitp/85ae2e3c833955929d03350b93ca3608 to your computer and use it in GitHub Desktop.
Save pawitp/85ae2e3c833955929d03350b93ca3608 to your computer and use it in GitHub Desktop.
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
public class Md5Chunking {
private static MessageDigest md;
private static final int BLOCK_SIZE = 63;
public static void main(String[] args) throws Exception {
// Init
md = MessageDigest.getInstance("MD5");
// Read the entire file to memory for simplicity
byte[] input = readInput("ubuntu-20.04.3-live-server-amd64.iso");
long startTime = System.nanoTime();
// Start at the position where we have enough bytes for BLOCK_SIZE
int currentPos = BLOCK_SIZE - 1;
int currentStartPos = 0;
while (currentPos < input.length) {
if (shouldSplit(input, currentPos)) {
System.out.println(currentPos + "," + (currentPos - currentStartPos + 1));
currentStartPos = currentPos + 1;
currentPos += BLOCK_SIZE;
} else {
currentPos++;
}
}
// Last chunk
currentPos = input.length - 1;
System.out.println(currentPos + "," + (currentPos - currentStartPos + 1));
long endTime = System.nanoTime();
System.out.println("Finished in " + (endTime - startTime) / 1000000 + " ms");
}
private static byte[] readInput(String filename) throws IOException {
File file = new File(filename);
try (FileInputStream fis = new FileInputStream(file)) {
long len = file.length();
byte[] content = new byte[(int) len];
if (len != fis.read(content)) {
throw new IOException("Unexpected bytes read");
}
return content;
}
}
private static boolean shouldSplit(byte[] b, int pos) throws NoSuchAlgorithmException {
md.reset();
md.update(b, pos - (BLOCK_SIZE - 1), BLOCK_SIZE);
byte[] result = md.digest();
// We want first 23 bits to be 0 (first 2 bytes and 7 bits of the third byte)
return result[0] == 0 && result[1] == 0 && (result[2] & 0b1111111) == 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment