Last active
April 26, 2023 00:16
-
-
Save llamasoft/dc6a812f1574dfa232e6e49726331a97 to your computer and use it in GitHub Desktop.
Finding hashes that XOR to zero
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import os | |
import numpy as np | |
import galois | |
GF = galois.GF(2) | |
# This script works for any hash algorithm! | |
hash_algo = hashlib.sha256 | |
hash_name = hash_algo().name | |
hash_bytes = hash_algo().digest_size | |
hash_bits = hash_bytes * 8 | |
def byte_xor(a: bytes, b: bytes) -> bytes: | |
"""XORs two byte strings and returns the result.""" | |
return bytes(x ^ y for x, y in zip(a, b)) | |
def make_hashplain(plaintext_len=8) -> tuple[bytes, str]: | |
"""Returns a random hash and its plaintext input.""" | |
plain = os.urandom(plaintext_len//2).hex() | |
hash = hash_algo(plain.encode()).digest() | |
return (hash, plain) | |
# NOTE: the endianness doesn't matter here so long as the | |
# bits-to-list order matches the bytes-to-int order. | |
def hash_to_bitlist(hash: bytes) -> list: | |
"""Converts a hash into a list of 0s and 1s.""" | |
n = int.from_bytes(hash, "little") | |
return [1 if n & 1<<pos else 0 for pos in range(hash_bits)] | |
def bitlist_to_hash(bitlist: list) -> bytes: | |
"""Converts a list of 0s and 1s back into a hash.""" | |
n = sum(int(bit) << pos for pos, bit in enumerate(bitlist)) | |
return n.to_bytes(hash_bytes, "little") | |
# This would have saved me a lot of debugging. :( | |
test_hash = os.urandom(hash_bytes) | |
assert test_hash == bitlist_to_hash(hash_to_bitlist(test_hash)) | |
# Create a N-bit size system of linear equations, "A", | |
# where each input hash becomes a column of 0/1 bits. | |
# We will later use this to solve for "A*x = b" where | |
# "b" is some arbitrary target hash. | |
while True: | |
plaintext_lookup = dict(make_hashplain() for _ in range(hash_bits)) | |
# The transpose is required here because the list comprehension | |
# turned hashes into rows but we need them as columns. | |
A = GF([hash_to_bitlist(h) for h in plaintext_lookup.keys()]).transpose() | |
if np.linalg.matrix_rank(A) == hash_bits: | |
# If our matrix has a rank < hash_bits it means that | |
# it can't be used to construct arbitrary solutions. | |
# Plus, numpy will throw an exception. :P | |
break | |
# NOTE: even though our goal is to find a set of hashes that | |
# XOR to zero, we can't set the target itself to zero. | |
# If we do, the only solution we'll get is all zeroes | |
# (which represents XORing an empty set of hashes). | |
# Instead, we'll use a random target hash which will give us | |
# "target hash XOR some number of inputs hashes == zero". | |
target_hash, target_plain = make_hashplain() | |
target_bits = hash_to_bitlist(target_hash) | |
b = GF(target_bits) | |
# Solve "A*x = b" | |
x = np.linalg.solve(A, b) | |
# We now have a list of bits that tells us which hashes | |
# (i.e. columns of `A`) XOR together to give us our target hash. | |
input_hashes = [] | |
for column, bit in enumerate(x): | |
if int(bit): | |
input_column = A[...,column] | |
input_hash = bitlist_to_hash(input_column) | |
input_hashes.append(input_hash) | |
input_hashes.sort(key=lambda h: plaintext_lookup[h]) | |
# Display the results! | |
print(f"Found solution with {len(input_hashes)+1} inputs:") | |
print(f"{hash_name}({target_plain!r}) = {target_hash.hex()}") | |
temp = target_hash | |
for hash in input_hashes: | |
prev = temp | |
temp = byte_xor(temp, hash) | |
plain = plaintext_lookup[hash] | |
print(f"{hash_name}({plain!r}) ^ {prev.hex()} = {temp.hex()}") | |
assert temp == b"\0" * hash_bytes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment