Skip to content

Instantly share code, notes, and snippets.

@cmrfrd
Last active October 2, 2023 18:55
Show Gist options
  • Save cmrfrd/21bad19b3ead7f21ca89d158615468cc to your computer and use it in GitHub Desktop.
Save cmrfrd/21bad19b3ead7f21ca89d158615468cc to your computer and use it in GitHub Desktop.
Gaussian Jordan Elim for finding bithash collision
import numpy as np
np.random.seed(256)
DTYPE=np.int64
P = 101
def is_singular(matrix: np.ndarray) -> bool:
return np.isclose(np.linalg.det(matrix), 0)
def add(x, y, m): return (x + y) % m
def sub(x, y, m): return (x - y) % m
def mul(x, y, m): return (x * y) % m
def inv(x, m):
g, a, _ = egcd(x, m)
if g != 1:
if m == 2: return 0
raise Exception(f"no inverse {x} mod {m}")
else:
return a % m
def egcd(a, b):
if a == 0:
return (b, 0, 1)
else:
g, y, x = egcd(b % a, a)
return (g, x - (b // a).astype(DTYPE) * y, y)
def gje_solution(A, b, m):
"""
Do gaussian-jordan elimination over mod field m and return some solution to the unknowns.
Algorithm based on: https://www.math.purdue.edu/~shao92/documents/Algorithm%20REF.pdf
"""
assert isinstance(m, int), "m should be an integer."
assert len(A.shape) == 2, "A should be 2-dimensional."
assert len(b.shape) == 1, "b should be 1-dimensional."
assert A.shape[0] == b.shape[0], "Number of equations in A != length of b."
assert A.dtype == b.dtype, "A and b should have the same dtype."
Ab = np.hstack((A, b.reshape(-1, 1)))
# to upper triangular
for i in range(len(b)):
# row swap
max_row = (np.argmax(np.abs(Ab[i:, i])) + i)
Ab[[i, max_row]] = Ab[[max_row, i]]
# row norm
Ab[i] = mul(Ab[i], inv(Ab[i, i], m), m)
# subtract multiple of current row from other rows
for j in range(i + 1, len(b)):
Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m)
# to reduced row echelon form
for i in range(len(b) - 1, -1, -1):
for j in range(i - 1, -1, -1):
Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m)
# check if no solution
all_zero = np.sum(Ab[:, :-1], axis=1) == 0
non_zero_result = Ab[:, -1] != 0
if np.any(all_zero & non_zero_result):
raise Exception("no solution")
# get single solution for (a, b, c, d)
num_equations, num_vars_plus_one = Ab.shape
num_vars = num_vars_plus_one - 1
x = np.zeros(num_vars, dtype=Ab.dtype)
for i in range(min(num_vars, num_equations)):
if np.all(Ab[i, :-1] == 0): # free variable
x[i] = 0
else: # pivot
x[i] = Ab[i, -1]
return Ab, x
# Example: 1
A = np.array([[-1, 2, 6, 7],
[3, -6, 0, -3],
[1, 0, 6, -1]])
b = np.array([15, -9, 5])
_, solution = gje_solution(A, b, P)
print("Solution1:\n", solution)
result = np.sum(mul(A, solution, P), axis=1) % P
assert np.all(result == b % P), "Solution is incorrect."
# Example: 2
A = np.array([[2, 1, 0, 2],
[0, 1, 0, 3],
[-1, 2, 0, 4],
[0, 1, 7, 2]])
b = np.array([4, 3, 3, 2])
_, solution = gje_solution(A, b, P)
print("Solution2:\n", solution)
result = np.sum(mul(A, solution, P), axis=1) % P
assert np.all(result == b % P), "Solution is incorrect."
# Example: 3
num_hashes = 100
hash_size = 32
A = np.random.randint(0, 2, size=(hash_size, num_hashes))
b = np.random.randint(0, 2, size=(hash_size,))
_, solution = gje_solution(A, b, 2)
print("Solution3:\n", solution)
hash_subset = A[:, solution.astype(bool)]
result = np.bitwise_xor.reduce(hash_subset, axis=1)
assert np.allclose(result, b), "Solution is incorrect."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment