llamasoft/hash_xor_zero.py

## hash_xor_zero.py
import hashlib
import os

import numpy as np
import galois

GF = galois.GF(2)

# This script works for any hash algorithm!
hash_algo = hashlib.sha256
hash_name = hash_algo().name
hash_bytes = hash_algo().digest_size
hash_bits = hash_bytes * 8

def byte_xor(a: bytes, b: bytes) -> bytes:
    """XORs two byte strings and returns the result."""
    return bytes(x ^ y for x, y in zip(a, b))

def make_hashplain(plaintext_len=8) -> tuple[bytes, str]:
    """Returns a random hash and its plaintext input."""
    plain = os.urandom(plaintext_len//2).hex()
    hash = hash_algo(plain.encode()).digest()
    return (hash, plain)

# NOTE: the endianness doesn't matter here so long as the
# bits-to-list order matches the bytes-to-int order.
def hash_to_bitlist(hash: bytes) -> list:
    """Converts a hash into a list of 0s and 1s."""
    n = int.from_bytes(hash, "little")
    return [1 if n & 1<<pos else 0 for pos in range(hash_bits)]

def bitlist_to_hash(bitlist: list) -> bytes:
    """Converts a list of 0s and 1s back into a hash."""
    n = sum(int(bit) << pos for pos, bit in enumerate(bitlist))
    return n.to_bytes(hash_bytes, "little")

# This would have saved me a lot of debugging. :(
test_hash = os.urandom(hash_bytes)
assert test_hash == bitlist_to_hash(hash_to_bitlist(test_hash))

# Create a N-bit size system of linear equations, "A",
# where each input hash becomes a column of 0/1 bits.
# We will later use this to solve for "A*x = b" where
# "b" is some arbitrary target hash.
while True:
    plaintext_lookup = dict(make_hashplain() for _ in range(hash_bits))
    # The transpose is required here because the list comprehension
    # turned hashes into rows but we need them as columns.
    A = GF([hash_to_bitlist(h) for h in plaintext_lookup.keys()]).transpose()
    if np.linalg.matrix_rank(A) == hash_bits:
        # If our matrix has a rank < hash_bits it means that
        # it can't be used to construct arbitrary solutions.
        # Plus, numpy will throw an exception. :P
        break

# NOTE: even though our goal is to find a set of hashes that
# XOR to zero, we can't set the target itself to zero.
# If we do, the only solution we'll get is all zeroes
# (which represents XORing an empty set of hashes).
# Instead, we'll use a random target hash which will give us
# "target hash XOR some number of inputs hashes == zero".
target_hash, target_plain = make_hashplain()
target_bits = hash_to_bitlist(target_hash)
b = GF(target_bits)

# Solve "A*x = b"
x = np.linalg.solve(A, b)

# We now have a list of bits that tells us which hashes
# (i.e. columns of `A`) XOR together to give us our target hash.
input_hashes = []
for column, bit in enumerate(x):
    if int(bit):
        input_column = A[...,column]
        input_hash = bitlist_to_hash(input_column)
        input_hashes.append(input_hash)
input_hashes.sort(key=lambda h: plaintext_lookup[h])

# Display the results!
print(f"Found solution with {len(input_hashes)+1} inputs:")
print(f"{hash_name}({target_plain!r}) = {target_hash.hex()}")
temp = target_hash
for hash in input_hashes:
    prev = temp
    temp = byte_xor(temp, hash)
    plain = plaintext_lookup[hash]
    print(f"{hash_name}({plain!r}) ^ {prev.hex()} = {temp.hex()}")

assert temp == b"\0" * hash_bytes
	import hashlib
	import os

	import numpy as np
	import galois

	GF = galois.GF(2)

	# This script works for any hash algorithm!
	hash_algo = hashlib.sha256
	hash_name = hash_algo().name
	hash_bytes = hash_algo().digest_size
	hash_bits = hash_bytes * 8

	def byte_xor(a: bytes, b: bytes) -> bytes:
	"""XORs two byte strings and returns the result."""
	return bytes(x ^ y for x, y in zip(a, b))

	def make_hashplain(plaintext_len=8) -> tuple[bytes, str]:
	"""Returns a random hash and its plaintext input."""
	plain = os.urandom(plaintext_len//2).hex()
	hash = hash_algo(plain.encode()).digest()
	return (hash, plain)

	# NOTE: the endianness doesn't matter here so long as the
	# bits-to-list order matches the bytes-to-int order.
	def hash_to_bitlist(hash: bytes) -> list:
	"""Converts a hash into a list of 0s and 1s."""
	n = int.from_bytes(hash, "little")
	return [1 if n & 1<<pos else 0 for pos in range(hash_bits)]

	def bitlist_to_hash(bitlist: list) -> bytes:
	"""Converts a list of 0s and 1s back into a hash."""
	n = sum(int(bit) << pos for pos, bit in enumerate(bitlist))
	return n.to_bytes(hash_bytes, "little")

	# This would have saved me a lot of debugging. :(
	test_hash = os.urandom(hash_bytes)
	assert test_hash == bitlist_to_hash(hash_to_bitlist(test_hash))

	# Create a N-bit size system of linear equations, "A",
	# where each input hash becomes a column of 0/1 bits.
	# We will later use this to solve for "A*x = b" where
	# "b" is some arbitrary target hash.
	while True:
	plaintext_lookup = dict(make_hashplain() for _ in range(hash_bits))
	# The transpose is required here because the list comprehension
	# turned hashes into rows but we need them as columns.
	A = GF([hash_to_bitlist(h) for h in plaintext_lookup.keys()]).transpose()
	if np.linalg.matrix_rank(A) == hash_bits:
	# If our matrix has a rank < hash_bits it means that
	# it can't be used to construct arbitrary solutions.
	# Plus, numpy will throw an exception. :P
	break

	# NOTE: even though our goal is to find a set of hashes that
	# XOR to zero, we can't set the target itself to zero.
	# If we do, the only solution we'll get is all zeroes
	# (which represents XORing an empty set of hashes).
	# Instead, we'll use a random target hash which will give us
	# "target hash XOR some number of inputs hashes == zero".
	target_hash, target_plain = make_hashplain()
	target_bits = hash_to_bitlist(target_hash)
	b = GF(target_bits)

	# Solve "A*x = b"
	x = np.linalg.solve(A, b)

	# We now have a list of bits that tells us which hashes
	# (i.e. columns of `A`) XOR together to give us our target hash.
	input_hashes = []
	for column, bit in enumerate(x):
	if int(bit):
	input_column = A[...,column]
	input_hash = bitlist_to_hash(input_column)
	input_hashes.append(input_hash)
	input_hashes.sort(key=lambda h: plaintext_lookup[h])

	# Display the results!
	print(f"Found solution with {len(input_hashes)+1} inputs:")
	print(f"{hash_name}({target_plain!r}) = {target_hash.hex()}")
	temp = target_hash
	for hash in input_hashes:
	prev = temp
	temp = byte_xor(temp, hash)
	plain = plaintext_lookup[hash]
	print(f"{hash_name}({plain!r}) ^ {prev.hex()} = {temp.hex()}")

	assert temp == b"\0" * hash_bytes