Created
October 24, 2021 03:32
-
-
Save pawitp/85ae2e3c833955929d03350b93ca3608 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.security.MessageDigest; | |
import java.security.NoSuchAlgorithmException; | |
public class Md5Chunking { | |
private static MessageDigest md; | |
private static final int BLOCK_SIZE = 63; | |
public static void main(String[] args) throws Exception { | |
// Init | |
md = MessageDigest.getInstance("MD5"); | |
// Read the entire file to memory for simplicity | |
byte[] input = readInput("ubuntu-20.04.3-live-server-amd64.iso"); | |
long startTime = System.nanoTime(); | |
// Start at the position where we have enough bytes for BLOCK_SIZE | |
int currentPos = BLOCK_SIZE - 1; | |
int currentStartPos = 0; | |
while (currentPos < input.length) { | |
if (shouldSplit(input, currentPos)) { | |
System.out.println(currentPos + "," + (currentPos - currentStartPos + 1)); | |
currentStartPos = currentPos + 1; | |
currentPos += BLOCK_SIZE; | |
} else { | |
currentPos++; | |
} | |
} | |
// Last chunk | |
currentPos = input.length - 1; | |
System.out.println(currentPos + "," + (currentPos - currentStartPos + 1)); | |
long endTime = System.nanoTime(); | |
System.out.println("Finished in " + (endTime - startTime) / 1000000 + " ms"); | |
} | |
private static byte[] readInput(String filename) throws IOException { | |
File file = new File(filename); | |
try (FileInputStream fis = new FileInputStream(file)) { | |
long len = file.length(); | |
byte[] content = new byte[(int) len]; | |
if (len != fis.read(content)) { | |
throw new IOException("Unexpected bytes read"); | |
} | |
return content; | |
} | |
} | |
private static boolean shouldSplit(byte[] b, int pos) throws NoSuchAlgorithmException { | |
md.reset(); | |
md.update(b, pos - (BLOCK_SIZE - 1), BLOCK_SIZE); | |
byte[] result = md.digest(); | |
// We want first 23 bits to be 0 (first 2 bytes and 7 bits of the third byte) | |
return result[0] == 0 && result[1] == 0 && (result[2] & 0b1111111) == 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment